aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
committerStephen Hines <srhines@google.com>2014-07-21 00:45:20 -0700
commitc6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch)
tree81b7dd2bb4370a392f31d332a566c903b5744764 /lib
parent19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff)
downloadexternal_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz
external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2
Update LLVM for rebase to r212749.
Includes a cherry-pick of: r212948 - fixes a small issue with atomic calls Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp40
-rw-r--r--lib/Analysis/Analysis.cpp1
-rw-r--r--lib/Analysis/Android.mk1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp220
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp337
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/ConstantFolding.cpp43
-rw-r--r--lib/Analysis/CostModel.cpp37
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp8
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp19
-rw-r--r--lib/Analysis/IVUsers.cpp5
-rw-r--r--lib/Analysis/InstructionSimplify.cpp170
-rw-r--r--lib/Analysis/JumpInstrTableInfo.cpp40
-rw-r--r--lib/Analysis/LoopPass.cpp5
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp8
-rw-r--r--lib/Analysis/RegionPass.cpp8
-rw-r--r--lib/Analysis/ScalarEvolution.cpp13
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp3
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--lib/Analysis/ValueTracking.cpp23
-rw-r--r--lib/AsmParser/LLLexer.cpp57
-rw-r--r--lib/AsmParser/LLLexer.h1
-rw-r--r--lib/AsmParser/LLParser.cpp271
-rw-r--r--lib/AsmParser/LLParser.h19
-rw-r--r--lib/AsmParser/LLToken.h14
-rw-r--r--lib/AsmParser/Parser.cpp14
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp301
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h90
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp4
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp43
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp43
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h9
-rw-r--r--lib/CodeGen/Analysis.cpp11
-rw-r--r--lib/CodeGen/Android.mk2
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk66
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp102
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp79
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp161
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h134
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp62
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h2
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp (renamed from lib/CodeGen/AsmPrinter/DwarfException.cpp)99
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h138
-rw-r--r--lib/CodeGen/AsmPrinter/Win64Exception.cpp26
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp2
-rw-r--r--lib/CodeGen/AtomicExpandLoadLinkedPass.cpp133
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp23
-rw-r--r--lib/CodeGen/BranchFolding.cpp15
-rw-r--r--lib/CodeGen/CMakeLists.txt2
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp22
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp51
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h6
-rw-r--r--lib/CodeGen/GlobalMerge.cpp (renamed from lib/Transforms/Scalar/GlobalMerge.cpp)76
-rw-r--r--lib/CodeGen/JumpInstrTables.cpp301
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp12
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp15
-rw-r--r--lib/CodeGen/LiveDebugVariables.h3
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp27
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--lib/CodeGen/MachineFunction.cpp45
-rw-r--r--lib/CodeGen/MachineScheduler.cpp336
-rw-r--r--lib/CodeGen/Passes.cpp16
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp381
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp87
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp22
-rw-r--r--lib/CodeGen/RegisterPressure.cpp5
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp529
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp158
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp154
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp91
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h6
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp22
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp278
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp135
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp389
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp318
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp41
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp4
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp20
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp2
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp110
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp170
-rw-r--r--lib/DebugInfo/DWARFContext.cpp2
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp84
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h32
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp10
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h2
-rw-r--r--lib/DebugInfo/DWARFUnit.cpp7
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp85
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp4
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp44
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h12
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp42
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp8
-rw-r--r--lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/Android.mk1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt1
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt2
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h3
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp641
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp217
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h6
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h23
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp127
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h9
-rw-r--r--lib/IR/Android.mk1
-rw-r--r--lib/IR/AsmWriter.cpp87
-rw-r--r--lib/IR/AsmWriter.h4
-rw-r--r--lib/IR/Attributes.cpp5
-rw-r--r--lib/IR/AutoUpgrade.cpp23
-rw-r--r--lib/IR/CMakeLists.txt1
-rw-r--r--lib/IR/Comdat.cpp25
-rw-r--r--lib/IR/ConstantFold.cpp34
-rw-r--r--lib/IR/Constants.cpp88
-rw-r--r--lib/IR/Core.cpp49
-rw-r--r--lib/IR/DIBuilder.cpp38
-rw-r--r--lib/IR/DebugInfo.cpp38
-rw-r--r--lib/IR/DebugLoc.cpp2
-rw-r--r--lib/IR/DiagnosticInfo.cpp2
-rw-r--r--lib/IR/Function.cpp5
-rw-r--r--lib/IR/GCOV.cpp10
-rw-r--r--lib/IR/Globals.cpp56
-rw-r--r--lib/IR/Instruction.cpp17
-rw-r--r--lib/IR/Instructions.cpp41
-rw-r--r--lib/IR/Metadata.cpp2
-rw-r--r--lib/IR/Module.cpp50
-rw-r--r--lib/IR/Pass.cpp30
-rw-r--r--lib/IR/PassRegistry.cpp114
-rw-r--r--lib/IR/Value.cpp66
-rw-r--r--lib/IR/Verifier.cpp120
-rw-r--r--lib/IRReader/IRReader.cpp39
-rw-r--r--lib/LTO/LLVMBuild.txt2
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp34
-rw-r--r--lib/LTO/LTOModule.cpp486
-rw-r--r--lib/Linker/LinkModules.cpp337
-rw-r--r--lib/MC/Android.mk9
-rw-r--r--lib/MC/CMakeLists.txt10
-rw-r--r--lib/MC/ConstantPools.cpp95
-rw-r--r--lib/MC/ELFObjectWriter.cpp8
-rw-r--r--lib/MC/LLVMBuild.txt4
-rw-r--r--lib/MC/MCAnalysis/Android.mk37
-rw-r--r--lib/MC/MCAnalysis/CMakeLists.txt8
-rw-r--r--lib/MC/MCAnalysis/LLVMBuild.txt5
-rw-r--r--lib/MC/MCAnalysis/MCAtom.cpp (renamed from lib/MC/MCAtom.cpp)4
-rw-r--r--lib/MC/MCAnalysis/MCFunction.cpp (renamed from lib/MC/MCFunction.cpp)6
-rw-r--r--lib/MC/MCAnalysis/MCModule.cpp (renamed from lib/MC/MCModule.cpp)6
-rw-r--r--lib/MC/MCAnalysis/MCModuleYAML.cpp (renamed from lib/MC/MCModuleYAML.cpp)12
-rw-r--r--lib/MC/MCAnalysis/MCObjectDisassembler.cpp (renamed from lib/MC/MCObjectDisassembler.cpp)6
-rw-r--r--lib/MC/MCAnalysis/MCObjectSymbolizer.cpp (renamed from lib/MC/MCObjectSymbolizer.cpp)0
-rw-r--r--lib/MC/MCAnalysis/Makefile14
-rw-r--r--lib/MC/MCAsmInfo.cpp5
-rw-r--r--lib/MC/MCAsmStreamer.cpp111
-rw-r--r--lib/MC/MCAssembler.cpp29
-rw-r--r--lib/MC/MCContext.cpp53
-rw-r--r--lib/MC/MCDwarf.cpp221
-rw-r--r--lib/MC/MCELFStreamer.cpp4
-rw-r--r--lib/MC/MCMachOStreamer.cpp4
-rw-r--r--lib/MC/MCNullStreamer.cpp66
-rw-r--r--lib/MC/MCObjectFileInfo.cpp136
-rw-r--r--lib/MC/MCObjectStreamer.cpp51
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp23
-rw-r--r--lib/MC/MCParser/AsmParser.cpp203
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp92
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp2
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp104
-rw-r--r--lib/MC/MCSectionCOFF.cpp9
-rw-r--r--lib/MC/MCStreamer.cpp137
-rw-r--r--lib/MC/MCTargetOptions.cpp3
-rw-r--r--lib/MC/MCWin64EH.cpp50
-rw-r--r--lib/MC/MachObjectWriter.cpp50
-rw-r--r--lib/MC/Makefile2
-rw-r--r--lib/MC/StringTableBuilder.cpp (renamed from lib/Object/StringTableBuilder.cpp)2
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp42
-rw-r--r--lib/MC/WinCOFFStreamer.cpp6
-rw-r--r--lib/MC/YAML.cpp (renamed from lib/Object/YAML.cpp)17
-rw-r--r--lib/Object/Android.mk8
-rw-r--r--lib/Object/Archive.cpp182
-rw-r--r--lib/Object/Binary.cpp28
-rw-r--r--lib/Object/CMakeLists.txt3
-rw-r--r--lib/Object/COFFObjectFile.cpp287
-rw-r--r--lib/Object/ELFObjectFile.cpp51
-rw-r--r--lib/Object/ELFYAML.cpp13
-rw-r--r--lib/Object/Error.cpp17
-rw-r--r--lib/Object/IRObjectFile.cpp208
-rw-r--r--lib/Object/LLVMBuild.txt2
-rw-r--r--lib/Object/MachOObjectFile.cpp696
-rw-r--r--lib/Object/MachOUniversal.cpp75
-rw-r--r--lib/Object/Object.cpp32
-rw-r--r--lib/Object/ObjectFile.cpp38
-rw-r--r--lib/Object/RecordStreamer.cpp100
-rw-r--r--lib/Object/RecordStreamer.h42
-rw-r--r--lib/Object/SymbolicFile.cpp14
-rw-r--r--lib/Option/ArgList.cpp56
-rw-r--r--lib/ProfileData/InstrProf.cpp13
-rw-r--r--lib/ProfileData/InstrProfReader.cpp45
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp7
-rw-r--r--lib/Support/APFloat.cpp4
-rw-r--r--lib/Support/ARMWinEH.cpp38
-rw-r--r--lib/Support/Android.mk7
-rw-r--r--lib/Support/Atomic.cpp2
-rw-r--r--lib/Support/CMakeLists.txt7
-rw-r--r--lib/Support/CommandLine.cpp26
-rw-r--r--lib/Support/ConvertUTF.c153
-rw-r--r--lib/Support/CrashRecoveryContext.cpp29
-rw-r--r--lib/Support/DataExtractor.cpp2
-rw-r--r--lib/Support/DataStream.cpp8
-rw-r--r--lib/Support/DynamicLibrary.cpp2
-rw-r--r--lib/Support/ErrorHandling.cpp92
-rw-r--r--lib/Support/FileOutputBuffer.cpp19
-rw-r--r--lib/Support/FileUtilities.cpp17
-rw-r--r--lib/Support/GraphWriter.cpp257
-rw-r--r--lib/Support/Host.cpp6
-rw-r--r--lib/Support/LockFileManager.cpp19
-rw-r--r--lib/Support/Makefile4
-rw-r--r--lib/Support/ManagedStatic.cpp18
-rw-r--r--lib/Support/MemoryBuffer.cpp134
-rw-r--r--lib/Support/Path.cpp170
-rw-r--r--lib/Support/Process.cpp32
-rw-r--r--lib/Support/Program.cpp7
-rw-r--r--lib/Support/RandomNumberGenerator.cpp61
-rw-r--r--lib/Support/ScaledNumber.cpp319
-rw-r--r--lib/Support/SourceMgr.cpp86
-rw-r--r--lib/Support/SpecialCaseList.cpp (renamed from lib/Transforms/Utils/SpecialCaseList.cpp)81
-rw-r--r--lib/Support/StringMap.cpp10
-rw-r--r--lib/Support/StringPool.cpp2
-rw-r--r--lib/Support/TargetRegistry.cpp11
-rw-r--r--lib/Support/Threading.cpp41
-rw-r--r--lib/Support/TimeValue.cpp2
-rw-r--r--lib/Support/Timer.cpp4
-rw-r--r--lib/Support/Triple.cpp16
-rw-r--r--lib/Support/Unix/Memory.inc26
-rw-r--r--lib/Support/Unix/Path.inc200
-rw-r--r--lib/Support/Unix/Process.inc10
-rw-r--r--lib/Support/Unix/Program.inc15
-rw-r--r--lib/Support/Unix/system_error.inc34
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc2
-rw-r--r--lib/Support/Windows/Memory.inc31
-rw-r--r--lib/Support/Windows/Path.inc299
-rw-r--r--lib/Support/Windows/Process.inc17
-rw-r--r--lib/Support/Windows/Program.inc18
-rw-r--r--lib/Support/Windows/WindowsSupport.h9
-rw-r--r--lib/Support/Windows/system_error.inc142
-rw-r--r--lib/Support/YAMLTraits.cpp9
-rw-r--r--lib/Support/raw_ostream.cpp4
-rw-r--r--lib/Support/regcclass.h5
-rw-r--r--lib/Support/regcname.h5
-rw-r--r--lib/Support/regex2.h5
-rw-r--r--lib/Support/regutils.h5
-rw-r--r--lib/Support/system_error.cpp130
-rw-r--r--lib/TableGen/Android.mk1
-rw-r--r--lib/TableGen/CMakeLists.txt1
-rw-r--r--lib/TableGen/Main.cpp14
-rw-r--r--lib/TableGen/Record.cpp10
-rw-r--r--lib/TableGen/SetTheory.cpp323
-rw-r--r--lib/TableGen/TGLexer.cpp22
-rw-r--r--lib/TableGen/TGLexer.h6
-rw-r--r--lib/TableGen/TGParser.cpp29
-rw-r--r--lib/TableGen/TGParser.h2
-rw-r--r--lib/Target/AArch64/AArch64.td3
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp21
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp6
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td14
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp51
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp7
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h11
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp279
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h5
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td65
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp48
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td89
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp81
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp4
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td4
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td304
-rw-r--r--lib/Target/AArch64/AArch64SchedA57WriteRes.td512
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp11
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h6
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp28
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h34
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp36
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h27
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp64
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp461
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp3
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt8
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp33
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp40
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Android.mk3
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h20
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp3
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp89
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp38
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h9
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp14
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp18
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h6
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp17
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp205
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td35
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td165
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td9
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMJITInfo.h8
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp10
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h13
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h6
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp126
-rw-r--r--lib/Target/ARM/ARMSubtarget.h52
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp164
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h119
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp67
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp773
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h7
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp11
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp31
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp235
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp3
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h9
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h6
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp767
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h2
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.h27
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h32
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h9
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp11
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h8
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h11
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp19
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.h25
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp14
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h36
-rw-r--r--lib/Target/Mips/Android.mk1
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp781
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp172
-rw-r--r--lib/Target/Mips/MCTargetDesc/Android.mk1
-rw-r--r--lib/Target/Mips/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp60
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h237
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp18
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp41
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h4
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp32
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp36
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp171
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td12
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td1
-rw-r--r--lib/Target/Mips/Mips.td5
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp4
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h3
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp6
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp7
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td8
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td177
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td441
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td97
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td163
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp56
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h6
-rw-r--r--lib/Target/Mips/MipsCallingConv.td5
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp7
-rw-r--r--lib/Target/Mips/MipsCondMov.td146
-rw-r--r--lib/Target/Mips/MipsDSPInstrFormats.td4
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp8
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp175
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h1
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp3
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h4
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp247
-rw-r--r--lib/Target/Mips/MipsISelLowering.h15
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td116
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td52
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td313
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp62
-rw-r--r--lib/Target/Mips/MipsMSAInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp11
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td16
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp4
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h3
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp24
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp99
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h12
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp57
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h3
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h2
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp145
-rw-r--r--lib/Target/Mips/MipsSubtarget.h72
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp86
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h71
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h108
-rw-r--r--lib/Target/NVPTX/NVPTX.td7
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp72
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.cpp24
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h8
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp532
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp797
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h19
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp4
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h3
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.td348
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td418
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h2
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.td7
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp57
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h35
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp66
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h42
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp69
-rw-r--r--lib/Target/NVPTX/cl_common_defines.h6
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp103
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp46
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp32
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h2
-rw-r--r--lib/Target/PowerPC/PPC.td10
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp24
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp38
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp168
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h198
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp11
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h4
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp14
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp1002
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h34
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td16
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td62
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td14
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp58
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td3
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h43
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp8
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp58
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h29
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp51
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h35
-rw-r--r--lib/Target/R600/AMDGPU.h8
-rw-r--r--lib/Target/R600/AMDGPU.td46
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp70
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h23
-rw-r--r--lib/Target/R600/AMDGPUConvertToISA.cpp62
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.cpp2
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp209
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp987
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h95
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp28
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h19
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td69
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td97
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.cpp (renamed from lib/Target/R600/AMDILIntrinsicInfo.cpp)40
-rw-r--r--lib/Target/R600/AMDGPUIntrinsicInfo.h (renamed from lib/Target/R600/AMDILIntrinsicInfo.h)21
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td29
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp1
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h4
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp387
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp4
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h14
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp100
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h100
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp13
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h7
-rw-r--r--lib/Target/R600/AMDILBase.td25
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp560
-rw-r--r--lib/Target/R600/AMDILInstrInfo.td150
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td224
-rw-r--r--lib/Target/R600/AMDILRegisterInfo.td107
-rw-r--r--lib/Target/R600/CMakeLists.txt6
-rw-r--r--lib/Target/R600/EvergreenInstructions.td11
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp15
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp14
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp1
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp314
-rw-r--r--lib/Target/R600/R600ISelLowering.h9
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp90
-rw-r--r--lib/Target/R600/R600InstrInfo.h18
-rw-r--r--lib/Target/R600/R600Instructions.td158
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp1
-rw-r--r--lib/Target/R600/R600Packetizer.cpp1
-rw-r--r--lib/Target/R600/R600RegisterInfo.cpp17
-rw-r--r--lib/Target/R600/R600RegisterInfo.h12
-rw-r--r--lib/Target/R600/R600RegisterInfo.td48
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp46
-rw-r--r--lib/Target/R600/SIDefines.h50
-rw-r--r--lib/Target/R600/SIFixSGPRLiveRanges.cpp110
-rw-r--r--lib/Target/R600/SIISelLowering.cpp303
-rw-r--r--lib/Target/R600/SIISelLowering.h10
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp2
-rw-r--r--lib/Target/R600/SIInstrFormats.td26
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp223
-rw-r--r--lib/Target/R600/SIInstrInfo.h12
-rw-r--r--lib/Target/R600/SIInstrInfo.td152
-rw-r--r--lib/Target/R600/SIInstructions.td868
-rw-r--r--lib/Target/R600/SIIntrinsics.td50
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp87
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp6
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp34
-rw-r--r--lib/Target/R600/SIRegisterInfo.h19
-rw-r--r--lib/Target/R600/SIRegisterInfo.td2
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp3
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp153
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp4
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp36
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h2
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp11
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h9
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp4
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h2
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp52
-rw-r--r--lib/Target/Sparc/SparcSubtarget.h26
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp32
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h36
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp172
-rw-r--r--lib/Target/SystemZ/SystemZCallingConv.td10
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.h8
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp63
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h3
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td7
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td119
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td44
-rw-r--r--lib/Target/SystemZ/SystemZOperands.td29
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td12
-rw-r--r--lib/Target/SystemZ/SystemZPatterns.td8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp29
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td25
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp28
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h26
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h26
-rw-r--r--lib/Target/TargetMachine.cpp15
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp13
-rw-r--r--lib/Target/X86/Android.mk1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp371
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h9
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp287
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h42
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp46
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp17
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp13
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp6
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td7
-rw-r--r--lib/Target/X86/X86AtomicExpandPass.cpp287
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp17
-rw-r--r--lib/Target/X86/X86FastISel.cpp1265
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp175
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp407
-rw-r--r--lib/Target/X86/X86FrameLowering.h20
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp32
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp3517
-rw-r--r--lib/Target/X86/X86ISelLowering.h31
-rw-r--r--lib/Target/X86/X86InstrAVX512.td301
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td6
-rw-r--r--lib/Target/X86/X86InstrCompiler.td139
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp136
-rw-r--r--lib/Target/X86/X86InstrInfo.h24
-rw-r--r--lib/Target/X86/X86InstrInfo.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td252
-rw-r--r--lib/Target/X86/X86InstrSystem.td5
-rw-r--r--lib/Target/X86/X86JITInfo.cpp6
-rw-r--r--lib/Target/X86/X86JITInfo.h8
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp35
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp54
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp57
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h8
-rw-r--r--lib/Target/X86/X86Subtarget.cpp59
-rw-r--r--lib/Target/X86/X86Subtarget.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp62
-rw-r--r--lib/Target/X86/X86TargetMachine.h30
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp167
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp6
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp49
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h5
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp9
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp7
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h2
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp10
-rw-r--r--lib/Target/XCore/XCoreSubtarget.h21
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp9
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h28
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp27
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp39
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp19
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp43
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp45
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp530
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp13
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h6
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp266
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp23
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp40
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp82
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp153
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp11
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp102
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp310
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp397
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp65
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp5
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp20
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp332
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp78
-rw-r--r--lib/Transforms/Scalar/Android.mk2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt2
-rw-r--r--lib/Transforms/Scalar/GVN.cpp20
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp9
-rw-r--r--lib/Transforms/Scalar/LICM.cpp73
-rw-r--r--lib/Transforms/Scalar/LoadCombine.cpp268
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp360
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp5
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp31
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp4
-rw-r--r--lib/Transforms/Scalar/SROA.cpp18
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp7
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp1
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp6
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp633
-rw-r--r--lib/Transforms/Scalar/Sink.cpp6
-rw-r--r--lib/Transforms/Utils/Android.mk1
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp2
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp9
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp17
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp28
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp23
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp129
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp94
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp373
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp297
715 files changed, 34625 insertions, 17381 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 57237e5..5cde979 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -60,6 +60,13 @@ bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
return AA->pointsToConstantMemory(Loc, OrLocal);
}
+AliasAnalysis::Location
+AliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
+ AliasAnalysis::ModRefResult &Mask) {
+ assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ return AA->getArgLocation(CS, ArgIdx, Mask);
+}
+
void AliasAnalysis::deleteValue(Value *V) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
AA->deleteValue(V);
@@ -91,22 +98,26 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
if (onlyAccessesArgPointees(MRB)) {
bool doesAlias = false;
+ ModRefResult AllArgsMask = NoModRef;
if (doesAccessArgPointees(MRB)) {
- MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
const Value *Arg = *AI;
if (!Arg->getType()->isPointerTy())
continue;
- Location CSLoc(Arg, UnknownSize, CSTag);
+ ModRefResult ArgMask;
+ Location CSLoc =
+ getArgLocation(CS, (unsigned) std::distance(CS.arg_begin(), AI),
+ ArgMask);
if (!isNoAlias(CSLoc, Loc)) {
doesAlias = true;
- break;
+ AllArgsMask = ModRefResult(AllArgsMask | ArgMask);
}
}
}
if (!doesAlias)
return NoModRef;
+ Mask = ModRefResult(Mask & AllArgsMask);
}
// If Loc is a constant memory location, the call definitely could not
@@ -150,14 +161,23 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
if (onlyAccessesArgPointees(CS2B)) {
AliasAnalysis::ModRefResult R = NoModRef;
if (doesAccessArgPointees(CS2B)) {
- MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
for (ImmutableCallSite::arg_iterator
I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- Location CS2Loc(Arg, UnknownSize, CS2Tag);
- R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask);
+ ModRefResult ArgMask;
+ Location CS2Loc =
+ getArgLocation(CS2, (unsigned) std::distance(CS2.arg_begin(), I),
+ ArgMask);
+ // ArgMask indicates what CS2 might do to CS2Loc, and the dependence of
+ // CS1 on that location is the inverse.
+ if (ArgMask == Mod)
+ ArgMask = ModRef;
+ else if (ArgMask == Ref)
+ ArgMask = Mod;
+
+ R = ModRefResult((R | (getModRefInfo(CS1, CS2Loc) & ArgMask)) & Mask);
if (R == Mask)
break;
}
@@ -170,14 +190,16 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
if (onlyAccessesArgPointees(CS1B)) {
AliasAnalysis::ModRefResult R = NoModRef;
if (doesAccessArgPointees(CS1B)) {
- MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa);
for (ImmutableCallSite::arg_iterator
I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
const Value *Arg = *I;
if (!Arg->getType()->isPointerTy())
continue;
- Location CS1Loc(Arg, UnknownSize, CS1Tag);
- if (getModRefInfo(CS2, CS1Loc) != NoModRef) {
+ ModRefResult ArgMask;
+ Location CS1Loc =
+ getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I),
+ ArgMask);
+ if ((getModRefInfo(CS2, CS1Loc) & ArgMask) != NoModRef) {
R = Mask;
break;
}
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 01c1c7e..ade940a 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -48,6 +48,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeIVUsersPass(Registry);
initializeInstCountPass(Registry);
initializeIntervalPartitionPass(Registry);
+ initializeJumpInstrTableInfoPass(Registry);
initializeLazyValueInfoPass(Registry);
initializeLibCallAliasAnalysisPass(Registry);
initializeLintPass(Registry);
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index bca673e..4e435a1 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -27,6 +27,7 @@ analysis_SRC_FILES := \
InstructionSimplify.cpp \
Interval.cpp \
IntervalPartition.cpp \
+ JumpInstrTableInfo.cpp \
LazyCallGraph.cpp \
LazyValueInfo.cpp \
LibCallAliasAnalysis.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index fe90b84..c50dd4a 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -490,6 +490,10 @@ namespace {
/// global) or not.
bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override;
+ /// Get the location associated with a pointer argument of a callsite.
+ Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
+ ModRefResult &Mask) override;
+
/// getModRefBehavior - Return the behavior when calling the given
/// call site.
ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
@@ -653,6 +657,21 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
return Worklist.empty();
}
+static bool isMemsetPattern16(const Function *MS,
+ const TargetLibraryInfo &TLI) {
+ if (TLI.has(LibFunc::memset_pattern16) &&
+ MS->getName() == "memset_pattern16") {
+ FunctionType *MemsetType = MS->getFunctionType();
+ if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
+ isa<PointerType>(MemsetType->getParamType(0)) &&
+ isa<PointerType>(MemsetType->getParamType(1)) &&
+ isa<IntegerType>(MemsetType->getParamType(2)))
+ return true;
+ }
+
+ return false;
+}
+
/// getModRefBehavior - Return the behavior when calling the given call site.
AliasAnalysis::ModRefBehavior
BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
@@ -692,10 +711,93 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) {
if (F->onlyReadsMemory())
Min = OnlyReadsMemory;
+ const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ if (isMemsetPattern16(F, TLI))
+ Min = OnlyAccessesArgumentPointees;
+
// Otherwise be conservative.
return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
}
+AliasAnalysis::Location
+BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
+ ModRefResult &Mask) {
+ Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask);
+ const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != nullptr)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memory intrinsic");
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Loc.Size = LenCI->getZExtValue();
+ assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
+ "Memory intrinsic location pointer not argument?");
+ Mask = ArgIdx ? Ref : Mod;
+ break;
+ }
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ assert(ArgIdx == 1 && "Invalid argument index");
+ assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
+ "Intrinsic location pointer not argument?");
+ Loc.Size = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ break;
+ }
+ case Intrinsic::invariant_end: {
+ assert(ArgIdx == 2 && "Invalid argument index");
+ assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
+ "Intrinsic location pointer not argument?");
+ Loc.Size = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ break;
+ }
+ case Intrinsic::arm_neon_vld1: {
+ assert(ArgIdx == 0 && "Invalid argument index");
+ assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
+ "Intrinsic location pointer not argument?");
+ // LLVM's vld1 and vst1 intrinsics currently only support a single
+ // vector register.
+ if (DL)
+ Loc.Size = DL->getTypeStoreSize(II->getType());
+ break;
+ }
+ case Intrinsic::arm_neon_vst1: {
+ assert(ArgIdx == 0 && "Invalid argument index");
+ assert(Loc.Ptr == II->getArgOperand(ArgIdx) &&
+ "Intrinsic location pointer not argument?");
+ if (DL)
+ Loc.Size = DL->getTypeStoreSize(II->getArgOperand(1)->getType());
+ break;
+ }
+ }
+
+ // We can bound the aliasing properties of memset_pattern16 just as we can
+ // for memcpy/memset. This is particularly important because the
+ // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
+ // whenever possible.
+ else if (CS.getCalledFunction() &&
+ isMemsetPattern16(CS.getCalledFunction(), TLI)) {
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for memset_pattern16");
+ if (ArgIdx == 1)
+ Loc.Size = 16;
+ else if (const ConstantInt *LenCI =
+ dyn_cast<ConstantInt>(CS.getArgument(2)))
+ Loc.Size = LenCI->getZExtValue();
+ assert(Loc.Ptr == CS.getArgument(ArgIdx) &&
+ "memset_pattern16 location pointer not argument?");
+ Mask = ArgIdx ? Ref : Mod;
+ }
+ // FIXME: Handle memset_pattern4 and memset_pattern8 also.
+
+ return Loc;
+}
+
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
@@ -748,124 +850,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return NoModRef;
}
- const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>();
- ModRefResult Min = ModRef;
-
- // Finally, handle specific knowledge of intrinsics.
- const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II != nullptr)
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::memcpy:
- case Intrinsic::memmove: {
- uint64_t Len = UnknownSize;
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
- Len = LenCI->getZExtValue();
- Value *Dest = II->getArgOperand(0);
- Value *Src = II->getArgOperand(1);
- // If it can't overlap the source dest, then it doesn't modref the loc.
- if (isNoAlias(Location(Dest, Len), Loc)) {
- if (isNoAlias(Location(Src, Len), Loc))
- return NoModRef;
- // If it can't overlap the dest, then worst case it reads the loc.
- Min = Ref;
- } else if (isNoAlias(Location(Src, Len), Loc)) {
- // If it can't overlap the source, then worst case it mutates the loc.
- Min = Mod;
- }
- break;
- }
- case Intrinsic::memset:
- // Since memset is 'accesses arguments' only, the AliasAnalysis base class
- // will handle it for the variable length case.
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- uint64_t Len = LenCI->getZExtValue();
- Value *Dest = II->getArgOperand(0);
- if (isNoAlias(Location(Dest, Len), Loc))
- return NoModRef;
- }
- // We know that memset doesn't load anything.
- Min = Mod;
- break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start: {
- uint64_t PtrSize =
- cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- if (isNoAlias(Location(II->getArgOperand(1),
- PtrSize,
- II->getMetadata(LLVMContext::MD_tbaa)),
- Loc))
- return NoModRef;
- break;
- }
- case Intrinsic::invariant_end: {
- uint64_t PtrSize =
- cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
- if (isNoAlias(Location(II->getArgOperand(2),
- PtrSize,
- II->getMetadata(LLVMContext::MD_tbaa)),
- Loc))
- return NoModRef;
- break;
- }
- case Intrinsic::arm_neon_vld1: {
- // LLVM's vld1 and vst1 intrinsics currently only support a single
- // vector register.
- uint64_t Size =
- DL ? DL->getTypeStoreSize(II->getType()) : UnknownSize;
- if (isNoAlias(Location(II->getArgOperand(0), Size,
- II->getMetadata(LLVMContext::MD_tbaa)),
- Loc))
- return NoModRef;
- break;
- }
- case Intrinsic::arm_neon_vst1: {
- uint64_t Size =
- DL ? DL->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize;
- if (isNoAlias(Location(II->getArgOperand(0), Size,
- II->getMetadata(LLVMContext::MD_tbaa)),
- Loc))
- return NoModRef;
- break;
- }
- }
-
- // We can bound the aliasing properties of memset_pattern16 just as we can
- // for memcpy/memset. This is particularly important because the
- // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
- // whenever possible.
- else if (TLI.has(LibFunc::memset_pattern16) &&
- CS.getCalledFunction() &&
- CS.getCalledFunction()->getName() == "memset_pattern16") {
- const Function *MS = CS.getCalledFunction();
- FunctionType *MemsetType = MS->getFunctionType();
- if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 &&
- isa<PointerType>(MemsetType->getParamType(0)) &&
- isa<PointerType>(MemsetType->getParamType(1)) &&
- isa<IntegerType>(MemsetType->getParamType(2))) {
- uint64_t Len = UnknownSize;
- if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2)))
- Len = LenCI->getZExtValue();
- const Value *Dest = CS.getArgument(0);
- const Value *Src = CS.getArgument(1);
- // If it can't overlap the source dest, then it doesn't modref the loc.
- if (isNoAlias(Location(Dest, Len), Loc)) {
- // Always reads 16 bytes of the source.
- if (isNoAlias(Location(Src, 16), Loc))
- return NoModRef;
- // If it can't overlap the dest, then worst case it reads the loc.
- Min = Ref;
- // Always reads 16 bytes of the source.
- } else if (isNoAlias(Location(Src, 16), Loc)) {
- // If it can't overlap the source, then worst case it mutates the loc.
- Min = Mod;
- }
- }
- }
-
// The AliasAnalysis base class has some smarts, lets use them.
- return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+ return AliasAnalysis::getModRefInfo(CS, Loc);
}
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 87d93a4..4fd2c11 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Support/raw_ostream.h"
#include <deque>
@@ -24,298 +23,13 @@ using namespace llvm::bfi_detail;
//===----------------------------------------------------------------------===//
//
-// UnsignedFloat implementation.
-//
-//===----------------------------------------------------------------------===//
-#ifndef _MSC_VER
-const int32_t UnsignedFloatBase::MaxExponent;
-const int32_t UnsignedFloatBase::MinExponent;
-#endif
-
-static void appendDigit(std::string &Str, unsigned D) {
- assert(D < 10);
- Str += '0' + D % 10;
-}
-
-static void appendNumber(std::string &Str, uint64_t N) {
- while (N) {
- appendDigit(Str, N % 10);
- N /= 10;
- }
-}
-
-static bool doesRoundUp(char Digit) {
- switch (Digit) {
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- return true;
- default:
- return false;
- }
-}
-
-static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
- assert(E >= UnsignedFloatBase::MinExponent);
- assert(E <= UnsignedFloatBase::MaxExponent);
-
- // Find a new E, but don't let it increase past MaxExponent.
- int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
- int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
- int Shift = 63 - (NewE - E);
- assert(Shift <= LeadingZeros);
- assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent);
- D <<= Shift;
- E = NewE;
-
- // Check for a denormal.
- unsigned AdjustedE = E + 16383;
- if (!(D >> 63)) {
- assert(E == UnsignedFloatBase::MaxExponent);
- AdjustedE = 0;
- }
-
- // Build the float and print it.
- uint64_t RawBits[2] = {D, AdjustedE};
- APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
- SmallVector<char, 24> Chars;
- Float.toString(Chars, Precision, 0);
- return std::string(Chars.begin(), Chars.end());
-}
-
-static std::string stripTrailingZeros(const std::string &Float) {
- size_t NonZero = Float.find_last_not_of('0');
- assert(NonZero != std::string::npos && "no . in floating point string");
-
- if (Float[NonZero] == '.')
- ++NonZero;
-
- return Float.substr(0, NonZero + 1);
-}
-
-std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
- unsigned Precision) {
- if (!D)
- return "0.0";
-
- // Canonicalize exponent and digits.
- uint64_t Above0 = 0;
- uint64_t Below0 = 0;
- uint64_t Extra = 0;
- int ExtraShift = 0;
- if (E == 0) {
- Above0 = D;
- } else if (E > 0) {
- if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
- D <<= Shift;
- E -= Shift;
-
- if (!E)
- Above0 = D;
- }
- } else if (E > -64) {
- Above0 = D >> -E;
- Below0 = D << (64 + E);
- } else if (E > -120) {
- Below0 = D >> (-E - 64);
- Extra = D << (128 + E);
- ExtraShift = -64 - E;
- }
-
- // Fall back on APFloat for very small and very large numbers.
- if (!Above0 && !Below0)
- return toStringAPFloat(D, E, Precision);
-
- // Append the digits before the decimal.
- std::string Str;
- size_t DigitsOut = 0;
- if (Above0) {
- appendNumber(Str, Above0);
- DigitsOut = Str.size();
- } else
- appendDigit(Str, 0);
- std::reverse(Str.begin(), Str.end());
-
- // Return early if there's nothing after the decimal.
- if (!Below0)
- return Str + ".0";
-
- // Append the decimal and beyond.
- Str += '.';
- uint64_t Error = UINT64_C(1) << (64 - Width);
-
- // We need to shift Below0 to the right to make space for calculating
- // digits. Save the precision we're losing in Extra.
- Extra = (Below0 & 0xf) << 56 | (Extra >> 8);
- Below0 >>= 4;
- size_t SinceDot = 0;
- size_t AfterDot = Str.size();
- do {
- if (ExtraShift) {
- --ExtraShift;
- Error *= 5;
- } else
- Error *= 10;
-
- Below0 *= 10;
- Extra *= 10;
- Below0 += (Extra >> 60);
- Extra = Extra & (UINT64_MAX >> 4);
- appendDigit(Str, Below0 >> 60);
- Below0 = Below0 & (UINT64_MAX >> 4);
- if (DigitsOut || Str.back() != '0')
- ++DigitsOut;
- ++SinceDot;
- } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 &&
- (!Precision || DigitsOut <= Precision || SinceDot < 2));
-
- // Return early for maximum precision.
- if (!Precision || DigitsOut <= Precision)
- return stripTrailingZeros(Str);
-
- // Find where to truncate.
- size_t Truncate =
- std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
-
- // Check if there's anything to truncate.
- if (Truncate >= Str.size())
- return stripTrailingZeros(Str);
-
- bool Carry = doesRoundUp(Str[Truncate]);
- if (!Carry)
- return stripTrailingZeros(Str.substr(0, Truncate));
-
- // Round with the first truncated digit.
- for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
- I != E; ++I) {
- if (*I == '.')
- continue;
- if (*I == '9') {
- *I = '0';
- continue;
- }
-
- ++*I;
- Carry = false;
- break;
- }
-
- // Add "1" in front if we still need to carry.
- return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
-}
-
-raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
- int Width, unsigned Precision) {
- return OS << toString(D, E, Width, Precision);
-}
-
-void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
- print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
- << "]";
-}
-
-static std::pair<uint64_t, int16_t>
-getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
- if (ShouldRound)
- if (!++N)
- // Rounding caused an overflow.
- return std::make_pair(UINT64_C(1), Shift + 64);
- return std::make_pair(N, Shift);
-}
-
-std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
- uint64_t Divisor) {
- // Input should be sanitized.
- assert(Divisor);
- assert(Dividend);
-
- // Minimize size of divisor.
- int16_t Shift = 0;
- if (int Zeros = countTrailingZeros(Divisor)) {
- Shift -= Zeros;
- Divisor >>= Zeros;
- }
-
- // Check for powers of two.
- if (Divisor == 1)
- return std::make_pair(Dividend, Shift);
-
- // Maximize size of dividend.
- if (int Zeros = countLeadingZeros64(Dividend)) {
- Shift -= Zeros;
- Dividend <<= Zeros;
- }
-
- // Start with the result of a divide.
- uint64_t Quotient = Dividend / Divisor;
- Dividend %= Divisor;
-
- // Continue building the quotient with long division.
- //
- // TODO: continue with largers digits.
- while (!(Quotient >> 63) && Dividend) {
- // Shift Dividend, and check for overflow.
- bool IsOverflow = Dividend >> 63;
- Dividend <<= 1;
- --Shift;
-
- // Divide.
- bool DoesDivide = IsOverflow || Divisor <= Dividend;
- Quotient = (Quotient << 1) | uint64_t(DoesDivide);
- Dividend -= DoesDivide ? Divisor : 0;
- }
-
- // Round.
- if (Dividend >= getHalf(Divisor))
- if (!++Quotient)
- // Rounding caused an overflow in Quotient.
- return std::make_pair(UINT64_C(1), Shift + 64);
-
- return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
-}
-
-std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
- uint64_t R) {
- // Separate into two 32-bit digits (U.L).
- uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
-
- // Compute cross products.
- uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
-
- // Sum into two 64-bit digits.
- uint64_t Upper = P1, Lower = P4;
- auto addWithCarry = [&](uint64_t N) {
- uint64_t NewLower = Lower + (N << 32);
- Upper += (N >> 32) + (NewLower < Lower);
- Lower = NewLower;
- };
- addWithCarry(P2);
- addWithCarry(P3);
-
- // Check whether the upper digit is empty.
- if (!Upper)
- return std::make_pair(Lower, 0);
-
- // Shift as little as possible to maximize precision.
- unsigned LeadingZeros = countLeadingZeros64(Upper);
- int16_t Shift = 64 - LeadingZeros;
- if (LeadingZeros)
- Upper = Upper << LeadingZeros | Lower >> Shift;
- bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
- return getRoundedFloat(Upper, ShouldRound, Shift);
-}
-
-//===----------------------------------------------------------------------===//
-//
// BlockMass implementation.
//
//===----------------------------------------------------------------------===//
-UnsignedFloat<uint64_t> BlockMass::toFloat() const {
+ScaledNumber<uint64_t> BlockMass::toScaled() const {
if (isFull())
- return UnsignedFloat<uint64_t>(1, 0);
- return UnsignedFloat<uint64_t>(getMass() + 1, -64);
+ return ScaledNumber<uint64_t>(1, 0);
+ return ScaledNumber<uint64_t>(getMass() + 1, -64);
}
void BlockMass::dump() const { print(dbgs()); }
@@ -342,7 +56,7 @@ namespace {
typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
typedef BlockFrequencyInfoImplBase::Distribution Distribution;
typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
-typedef BlockFrequencyInfoImplBase::Float Float;
+typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64;
typedef BlockFrequencyInfoImplBase::LoopData LoopData;
typedef BlockFrequencyInfoImplBase::Weight Weight;
typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
@@ -622,7 +336,7 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
///
/// Gives the maximum number of estimated iterations allowed for a loop. Very
/// large numbers cause problems downstream (even within 64-bits).
-static Float getMaxLoopScale() { return Float(1, 12); }
+static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); }
/// \brief Compute the loop scale for a loop.
void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
@@ -634,7 +348,7 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
// Block scale stores the inverse of the scale.
- Loop.Scale = ExitMass.toFloat().inverse();
+ Loop.Scale = ExitMass.toScaled().inverse();
DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
<< " - " << Loop.BackedgeMass << ")\n"
@@ -708,15 +422,16 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
}
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
- const Float &Min, const Float &Max) {
+ const Scaled64 &Min, const Scaled64 &Max) {
// Scale the Factor to a size that creates integers. Ideally, integers would
// be scaled so that Max == UINT64_MAX so that they can be best
// differentiated. However, the register allocator currently deals poorly
// with large numbers. Instead, push Min up a little from 1 to give some
// room to differentiate small, unequal numbers.
//
- // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
- Float ScalingFactor = Min.inverse();
+ // TODO: fix issues downstream so that ScalingFactor can be
+ // Scaled64(1,64)/Max.
+ Scaled64 ScalingFactor = Min.inverse();
if ((Max / Min).lg() < 60)
ScalingFactor <<= 3;
@@ -724,10 +439,10 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
<< ", factor = " << ScalingFactor << "\n");
for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
- Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
+ Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor;
BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
- << BFI.Freqs[Index].Floating << ", scaled = " << Scaled
+ << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled
<< ", int = " << BFI.Freqs[Index].Integer << "\n");
}
}
@@ -740,7 +455,7 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
<< ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
<< "\n");
- Loop.Scale *= Loop.Mass.toFloat();
+ Loop.Scale *= Loop.Mass.toScaled();
Loop.IsPackaged = false;
DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
@@ -749,9 +464,9 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
// final head scale will be used for updated the rest of the members.
for (const BlockNode &N : Loop.Nodes) {
const auto &Working = BFI.Working[N.Index];
- Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
- : BFI.Freqs[N.Index].Floating;
- Float New = Loop.Scale * F;
+ Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
+ : BFI.Freqs[N.Index].Scaled;
+ Scaled64 New = Loop.Scale * F;
DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
<< "\n");
F = New;
@@ -761,7 +476,7 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
void BlockFrequencyInfoImplBase::unwrapLoops() {
// Set initial frequencies from loop-local masses.
for (size_t Index = 0; Index < Working.size(); ++Index)
- Freqs[Index].Floating = Working[Index].Mass.toFloat();
+ Freqs[Index].Scaled = Working[Index].Mass.toScaled();
for (LoopData &Loop : Loops)
unwrapLoop(*this, Loop);
@@ -770,12 +485,12 @@ void BlockFrequencyInfoImplBase::unwrapLoops() {
void BlockFrequencyInfoImplBase::finalizeMetrics() {
// Unwrap loop packages in reverse post-order, tracking min and max
// frequencies.
- auto Min = Float::getLargest();
- auto Max = Float::getZero();
+ auto Min = Scaled64::getLargest();
+ auto Max = Scaled64::getZero();
for (size_t Index = 0; Index < Working.size(); ++Index) {
// Update min/max scale.
- Min = std::min(Min, Freqs[Index].Floating);
- Max = std::max(Max, Freqs[Index].Floating);
+ Min = std::min(Min, Freqs[Index].Scaled);
+ Max = std::max(Max, Freqs[Index].Scaled);
}
// Convert to integers.
@@ -794,11 +509,11 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
return 0;
return Freqs[Node.Index].Integer;
}
-Float
+Scaled64
BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
if (!Node.isValid())
- return Float::getZero();
- return Freqs[Node.Index].Floating;
+ return Scaled64::getZero();
+ return Freqs[Node.Index].Scaled;
}
std::string
@@ -819,8 +534,8 @@ BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
raw_ostream &
BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
const BlockFrequency &Freq) const {
- Float Block(Freq.getFrequency(), 0);
- Float Entry(getEntryFreq(), 0);
+ Scaled64 Block(Freq.getFrequency(), 0);
+ Scaled64 Entry(getEntryFreq(), 0);
return OS << Block / Entry;
}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index b546789..d1632fd 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_library(LLVMAnalysis
InstructionSimplify.cpp
Interval.cpp
IntervalPartition.cpp
+ JumpInstrTableInfo.cpp
LazyCallGraph.cpp
LazyValueInfo.cpp
LibCallAliasAnalysis.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 0ac1cb5..eb3e2c6 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -31,11 +32,15 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FEnv.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include <cerrno>
#include <cmath>
+
+#ifdef HAVE_FENV_H
+#include <fenv.h>
+#endif
+
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -706,7 +711,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
static Constant* StripPtrCastKeepAS(Constant* Ptr) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
PointerType *OldPtrTy = cast<PointerType>(Ptr->getType());
- Ptr = cast<Constant>(Ptr->stripPointerCasts());
+ Ptr = Ptr->stripPointerCasts();
PointerType *NewPtrTy = cast<PointerType>(Ptr->getType());
// Preserve the address space number of the pointer.
@@ -1314,12 +1319,34 @@ static Constant *GetConstantFoldFPValue(double V, Type *Ty) {
}
+namespace {
+/// llvm_fenv_clearexcept - Clear the floating-point exception state.
+static inline void llvm_fenv_clearexcept() {
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
+ feclearexcept(FE_ALL_EXCEPT);
+#endif
+ errno = 0;
+}
+
+/// llvm_fenv_testexcept - Test if a floating-point exception was raised.
+static inline bool llvm_fenv_testexcept() {
+ int errno_val = errno;
+ if (errno_val == ERANGE || errno_val == EDOM)
+ return true;
+#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
+ if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
+ return true;
+#endif
+ return false;
+}
+} // End namespace
+
static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
Type *Ty) {
- sys::llvm_fenv_clearexcept();
+ llvm_fenv_clearexcept();
V = NativeFP(V);
- if (sys::llvm_fenv_testexcept()) {
- sys::llvm_fenv_clearexcept();
+ if (llvm_fenv_testexcept()) {
+ llvm_fenv_clearexcept();
return nullptr;
}
@@ -1328,10 +1355,10 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
double V, double W, Type *Ty) {
- sys::llvm_fenv_clearexcept();
+ llvm_fenv_clearexcept();
V = NativeFP(V, W);
- if (sys::llvm_fenv_testexcept()) {
- sys::llvm_fenv_clearexcept();
+ if (llvm_fenv_testexcept()) {
+ llvm_fenv_clearexcept();
return nullptr;
}
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 780b1aa..1b74f8c 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -95,6 +95,31 @@ static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
return true;
}
+static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
+ bool isAlternate = true;
+ unsigned MaskSize = Mask.size();
+
+ // Example: shufflevector A, B, <0,5,2,7>
+ for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
+ }
+
+ if (isAlternate)
+ return true;
+
+ isAlternate = true;
+ // Example: shufflevector A, B, <4,1,6,3>
+ for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+ if (Mask[i] < 0)
+ continue;
+ isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+ }
+
+ return isAlternate;
+}
+
static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
TargetTransformInfo::OperandValueKind OpInfo =
TargetTransformInfo::OK_AnyValue;
@@ -466,9 +491,15 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
unsigned NumVecElems = VecTypOp0->getVectorNumElements();
SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
- if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
- return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
- nullptr);
+ if (NumVecElems == Mask.size()) {
+ if (isReverseVectorMask(Mask))
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
+ 0, nullptr);
+ if (isAlternateVectorMask(Mask))
+ return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
+ VecTypOp0, 0, nullptr);
+ }
+
return -1;
}
case Instruction::Call:
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index bfab744..c27edbf 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -602,8 +602,12 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
Out << Banner;
- for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
- (*I)->getFunction()->print(Out);
+ for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+ if ((*I)->getFunction())
+ (*I)->getFunction()->print(Out);
+ else
+ Out << "\nPrinting <null> Function\n";
+ }
return false;
}
};
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 66f3f8e..8807529 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -841,10 +841,7 @@ bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
// original function which is extremely undefined behavior.
// FIXME: This logic isn't really right; we can safely inline functions with
// indirectbr's as long as no other function or global references the
- // blockaddress of a block within the current function. And as a QOI issue,
- // if someone is using a blockaddress without an indirectbr, and that
- // reference somehow ends up in another function or global, we probably don't
- // want to inline this function.
+ // blockaddress of a block within the current function.
HasIndirectBr = true;
return false;
}
@@ -1121,6 +1118,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
if (BB->empty())
continue;
+ // Disallow inlining a blockaddress. A blockaddress only has defined
+ // behavior for an indirect branch in the same function, and we do not
+ // currently support inlining indirect branches. But, the inliner may not
+ // see an indirect branch that ends up being dead code at a particular call
+ // site. If the blockaddress escapes the function, e.g., via a global
+ // variable, inlining may lead to an invalid cross-function reference.
+ if (BB->hasAddressTaken())
+ return false;
+
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
@@ -1303,8 +1309,9 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::ReturnsTwice);
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
- // Disallow inlining of functions which contain an indirect branch.
- if (isa<IndirectBrInst>(BI->getTerminator()))
+ // Disallow inlining of functions which contain indirect branches or
+ // blockaddresses.
+ if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
return false;
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index c819bd3..24655aa 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -287,7 +287,10 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
OS << ")";
}
OS << " in ";
- UI->getUser()->print(OS);
+ if (UI->getUser())
+ UI->getUser()->print(OS);
+ else
+ OS << "Printing <null> User";
OS << '\n';
}
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 3684fda..bd42af1 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -39,7 +39,6 @@ using namespace llvm::PatternMatch;
enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
-STATISTIC(NumFactor , "Number of factorizations");
STATISTIC(NumReassoc, "Number of reassociations");
struct Query {
@@ -183,78 +182,6 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return nullptr;
}
-/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
-/// using the operation OpCodeToExtract. For example, when Opcode is Add and
-/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
-/// Returns the simplified value, or null if no simplification was performed.
-static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
- unsigned OpcToExtract, const Query &Q,
- unsigned MaxRecurse) {
- Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
- // Recursion is always used, so bail out at once if we already hit the limit.
- if (!MaxRecurse--)
- return nullptr;
-
- BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
- BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
-
- if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
- !Op1 || Op1->getOpcode() != OpcodeToExtract)
- return nullptr;
-
- // The expression has the form "(A op' B) op (C op' D)".
- Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
- Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
-
- // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
- // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
- // commutative case, "(A op' B) op (C op' A)"?
- if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
- Value *DD = A == C ? D : C;
- // Form "A op' (B op DD)" if it simplifies completely.
- // Does "B op DD" simplify?
- if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
- // It does! Return "A op' V" if it simplifies or is already available.
- // If V equals B then "A op' V" is just the LHS. If V equals DD then
- // "A op' V" is just the RHS.
- if (V == B || V == DD) {
- ++NumFactor;
- return V == B ? LHS : RHS;
- }
- // Otherwise return "A op' V" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
- ++NumFactor;
- return W;
- }
- }
- }
-
- // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
- // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
- // commutative case, "(A op' B) op (B op' D)"?
- if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
- Value *CC = B == D ? C : D;
- // Form "(A op CC) op' B" if it simplifies completely..
- // Does "A op CC" simplify?
- if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
- // It does! Return "V op' B" if it simplifies or is already available.
- // If V equals A then "V op' B" is just the LHS. If V equals CC then
- // "V op' B" is just the RHS.
- if (V == A || V == CC) {
- ++NumFactor;
- return V == A ? LHS : RHS;
- }
- // Otherwise return "V op' B" if it simplifies.
- if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
- ++NumFactor;
- return W;
- }
- }
- }
-
- return nullptr;
-}
-
/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
/// operations. Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
@@ -634,11 +561,6 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
MaxRecurse))
return V;
- // Mul distributes over Add. Try some generic simplifications based on this.
- if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
- Q, MaxRecurse))
- return V;
-
// Threading Add over selects and phi nodes is pointless, so don't bother.
// Threading over the select in "A + select(cond, B, C)" means evaluating
// "A+B" and "A+C" and seeing if they are equal; but they are equal if and
@@ -754,16 +676,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
- // (X*2) - X -> X
- // (X<<1) - X -> X
- Value *X = nullptr;
- if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
- match(Op0, m_Shl(m_Specific(Op1), m_One())))
- return Op1;
-
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
- Value *Y = nullptr, *Z = Op1;
+ Value *X = nullptr, *Y = nullptr, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
@@ -835,11 +750,6 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
if (Constant *Result = computePointerDifference(Q.DL, X, Y))
return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
- // Mul distributes over Sub. Try some generic simplifications based on this.
- if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
- Q, MaxRecurse))
- return V;
-
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntegerTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
@@ -1518,11 +1428,6 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
Q, MaxRecurse))
return V;
- // Or distributes over And. Try some generic simplifications based on this.
- if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
- Q, MaxRecurse))
- return V;
-
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1613,11 +1518,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
MaxRecurse))
return V;
- // And distributes over Or. Try some generic simplifications based on this.
- if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
- Q, MaxRecurse))
- return V;
-
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
@@ -1625,6 +1525,38 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
MaxRecurse))
return V;
+ // (A & C)|(B & D)
+ Value *C = nullptr, *D = nullptr;
+ if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+ match(Op1, m_And(m_Value(B), m_Value(D)))) {
+ ConstantInt *C1 = dyn_cast<ConstantInt>(C);
+ ConstantInt *C2 = dyn_cast<ConstantInt>(D);
+ if (C1 && C2 && (C1->getValue() == ~C2->getValue())) {
+ // (A & C1)|(B & C2)
+ // If we have: ((V + N) & C1) | (V & C2)
+ // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+ // replace with V+N.
+ Value *V1, *V2;
+ if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+
+ match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+ return A;
+ if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+ return A;
+ }
+ // Or commutes, try both ways.
+ if ((C1->getValue() & (C1->getValue() + 1)) == 0 &&
+ match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+ // Add commutes, try both ways.
+ if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+ return B;
+ if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+ return B;
+ }
+ }
+ }
+
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
@@ -1677,11 +1609,6 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
MaxRecurse))
return V;
- // And distributes over Xor. Try some generic simplifications based on this.
- if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
- Q, MaxRecurse))
- return V;
-
// Threading Xor over selects and phi nodes is pointless, so don't bother.
// Threading over the select in "A ^ select(cond, B, C)" means evaluating
// "A^B" and "A^C" and seeing if they are equal; but they are equal if and
@@ -2021,9 +1948,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (!CI2->isZero())
Upper = NegOne.udiv(CI2->getValue()) + 1;
} else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
- // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
- Upper = CI2->getValue().abs() + 1;
- Lower = (-Upper) + 1;
+ if (CI2->isMinSignedValue()) {
+ // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+ Lower = CI2->getValue();
+ Upper = Lower.lshr(1) + 1;
+ } else {
+ // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
+ Upper = CI2->getValue().abs() + 1;
+ Lower = (-Upper) + 1;
+ }
} else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
// 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
APInt IntMin = APInt::getSignedMinValue(Width);
@@ -2241,6 +2174,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // If a bit is known to be zero for A and known to be one for B,
+ // then A and B cannot be equal.
+ if (ICmpInst::isEquality(Pred)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ uint32_t BitWidth = CI->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+ if (((LHSKnownOne & RHSKnownZero) != 0) ||
+ ((LHSKnownZero & RHSKnownOne) != 0))
+ return (Pred == ICmpInst::ICMP_EQ)
+ ? ConstantInt::getFalse(CI->getContext())
+ : ConstantInt::getTrue(CI->getContext());
+ }
+ }
+
// Special logic for binary operators.
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp
new file mode 100644
index 0000000..b5b4265
--- /dev/null
+++ b/lib/Analysis/JumpInstrTableInfo.cpp
@@ -0,0 +1,40 @@
+//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Information about jump-instruction tables that have been created by
+/// JumpInstrTables pass.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jiti"
+
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info",
+ "Jump-Instruction Table Info", true, true)
+char JumpInstrTableInfo::ID = 0;
+
+ImmutablePass *llvm::createJumpInstrTableInfoPass() {
+ return new JumpInstrTableInfo();
+}
+
+JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() {
+ initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTableInfo::~JumpInstrTableInfo() {}
+
+void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target,
+ Function *Jump) {
+ Tables[TableFunTy].push_back(JumpPair(Target, Jump));
+}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 8df18e7..7bd866e 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -45,7 +45,10 @@ public:
for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
b != be;
++b) {
- (*b)->print(Out);
+ if (*b)
+ (*b)->print(Out);
+ else
+ Out << "Printing <null> block";
}
return false;
}
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 4e11e50..139fa38 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Pass.h"
using namespace llvm;
@@ -53,6 +54,13 @@ namespace {
bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override {
return false;
}
+ Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx,
+ ModRefResult &Mask) override {
+ Mask = ModRef;
+ return Location(CS.getArgument(ArgIdx), UnknownSize,
+ CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa));
+ }
+
ModRefResult getModRefInfo(ImmutableCallSite CS,
const Location &Loc) override {
return ModRef;
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 3c7798f..71de144 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -195,8 +195,12 @@ public:
bool runOnRegion(Region *R, RGPassManager &RGM) override {
Out << Banner;
- for (const auto &BB : R->blocks())
- BB->print(Out);
+ for (const auto &BB : R->blocks()) {
+ if (BB)
+ BB->print(Out);
+ else
+ Out << "Printing <null> Block";
+ }
return false;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 42a7aa2..06dbde5 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -7216,6 +7216,15 @@ public:
cast<SCEVConstant>(Zero)->getValue();
Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ if (Remainder->isZero()) {
+ // The Quotient is obtained by replacing Denominator by 1 in Numerator.
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(One)->getValue();
+ Quotient =
+ SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+ return;
+ }
+
// Quotient is (Numerator - Remainder) divided by Denominator.
const SCEV *Q, *R;
const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
@@ -7356,7 +7365,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
Ty = Store->getValueOperand()->getType();
else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
- Ty = Load->getPointerOperand()->getType();
+ Ty = Load->getType();
else
return nullptr;
@@ -7370,7 +7379,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
SmallVectorImpl<const SCEV *> &Sizes,
const SCEV *ElementSize) const {
- if (Terms.size() < 1)
+ if (Terms.size() < 1 || !ElementSize)
return;
// Early return when Terms do not contain parameters: we do not delinearize
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index b507043..8c75b0d 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -1706,7 +1707,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
// Fold constant phis. They may be congruent to other constant phis and
// would confuse the logic below that expects proper IVs.
- if (Value *V = Phi->hasConstantValue()) {
+ if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) {
Phi->replaceAllUsesWith(V);
DeadInsts.push_back(Phi);
++NumElim;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index e9db295..3ccefb0 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -241,7 +241,7 @@ TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
}
/// Top level driver for transforming an expression DAG into its requested
-/// post-inc form (either "Normalized" or "Denormalized".
+/// post-inc form (either "Normalized" or "Denormalized").
const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
const SCEV *S,
Instruction *User,
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4f48753..5264745 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -188,7 +188,8 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
KnownOne.setBit(BitWidth - 1);
}
-void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
+ APInt &KnownZero) {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned NumRanges = Ranges.getNumOperands() / 2;
assert(NumRanges >= 1);
@@ -338,7 +339,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
default: break;
case Instruction::Load:
if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
- computeKnownBitsLoad(*MD, KnownZero);
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero);
break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
@@ -733,6 +734,12 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
}
case Instruction::Call:
+ case Instruction::Invoke:
+ if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
+ computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+ // If a range metadata is attached to this IntrinsicInst, intersect the
+ // explicit range specified by the metadata and the implicit range of
+ // the intrinsic.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: break;
@@ -742,16 +749,16 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// If this call is undefined for 0, the result will be less than 2^n.
if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
LowBits -= 1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
case Intrinsic::ctpop: {
unsigned LowBits = Log2_32(BitWidth)+1;
- KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
- KnownZero = APInt::getHighBitsSet(64, 32);
+ KnownZero |= APInt::getHighBitsSet(64, 32);
break;
}
}
@@ -1977,7 +1984,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return true;
case Instruction::UDiv:
case Instruction::URem:
- // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+ // x / y is undefined if y == 0, but calculations like x / 3 are safe.
return isKnownNonZero(Inst->getOperand(1), TD);
case Instruction::SDiv:
case Instruction::SRem: {
@@ -2000,12 +2007,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
// Speculative load may create a race that did not exist in the source.
LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return false;
- return LI->getPointerOperand()->isDereferenceablePointer();
+ return LI->getPointerOperand()->isDereferenceablePointer(TD);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
switch (II->getIntrinsicID()) {
- // These synthetic intrinsics have no side-effects, and just mark
+ // These synthetic intrinsics have no side-effects and just mark
// information about their operands.
// FIXME: There are other no-op synthetic instructions that potentially
// should be considered at least *safe* to speculate...
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 44a3412..1e5bcdd 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -209,6 +209,7 @@ lltok::Kind LLLexer::LexToken() {
return LexToken();
case '+': return LexPositive();
case '@': return LexAt();
+ case '$': return LexDollar();
case '%': return LexPercent();
case '"': return LexQuote();
case '.':
@@ -222,13 +223,6 @@ lltok::Kind LLLexer::LexToken() {
return lltok::dotdotdot;
}
return lltok::Error;
- case '$':
- if (const char *Ptr = isLabelTail(CurPtr)) {
- CurPtr = Ptr;
- StrVal.assign(TokStart, CurPtr-1);
- return lltok::LabelStr;
- }
- return lltok::Error;
case ';':
SkipLineComment();
return LexToken();
@@ -307,6 +301,43 @@ lltok::Kind LLLexer::LexAt() {
return lltok::Error;
}
+lltok::Kind LLLexer::LexDollar() {
+ if (const char *Ptr = isLabelTail(TokStart)) {
+ CurPtr = Ptr;
+ StrVal.assign(TokStart, CurPtr - 1);
+ return lltok::LabelStr;
+ }
+
+ // Handle DollarStringConstant: $\"[^\"]*\"
+ if (CurPtr[0] == '"') {
+ ++CurPtr;
+
+ while (1) {
+ int CurChar = getNextChar();
+
+ if (CurChar == EOF) {
+ Error("end of file in COMDAT variable name");
+ return lltok::Error;
+ }
+ if (CurChar == '"') {
+ StrVal.assign(TokStart + 2, CurPtr - 1);
+ UnEscapeLexed(StrVal);
+ if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
+ Error("Null bytes are not allowed in names");
+ return lltok::Error;
+ }
+ return lltok::ComdatVar;
+ }
+ }
+ }
+
+ // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
+ if (ReadVarName())
+ return lltok::ComdatVar;
+
+ return lltok::Error;
+}
+
/// ReadString - Read a string until the closing quote.
lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
const char *Start = CurPtr;
@@ -490,7 +521,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
- KEYWORD(weak);
+ KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
KEYWORD(weak_odr);
KEYWORD(appending);
KEYWORD(dllimport);
@@ -583,6 +614,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(cold);
KEYWORD(inlinehint);
KEYWORD(inreg);
+ KEYWORD(jumptable);
KEYWORD(minsize);
KEYWORD(naked);
KEYWORD(nest);
@@ -617,6 +649,15 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(type);
KEYWORD(opaque);
+ KEYWORD(comdat);
+
+ // Comdat types
+ KEYWORD(any);
+ KEYWORD(exactmatch);
+ KEYWORD(largest);
+ KEYWORD(noduplicates);
+ KEYWORD(samesize);
+
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index ad11d49..d42de57 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -81,6 +81,7 @@ namespace llvm {
lltok::Kind LexDigitOrNegative();
lltok::Kind LexPositive();
lltok::Kind LexAt();
+ lltok::Kind LexDollar();
lltok::Kind LexExclaim();
lltok::Kind LexPercent();
lltok::Kind LexQuote();
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 3282e8a..be55ac6 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -163,6 +163,11 @@ bool LLParser::ValidateEndOfModule() {
return Error(I->second.second,
"use of undefined type named '" + I->getKey() + "'");
+ if (!ForwardRefComdats.empty())
+ return Error(ForwardRefComdats.begin()->second,
+ "use of undefined comdat '$" +
+ ForwardRefComdats.begin()->first + "'");
+
if (!ForwardRefVals.empty())
return Error(ForwardRefVals.begin()->second.second,
"use of undefined value '@" + ForwardRefVals.begin()->first +
@@ -238,6 +243,7 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::LocalVar: if (ParseNamedType()) return true; break;
case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break;
case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break;
+ case lltok::ComdatVar: if (parseComdat()) return true; break;
case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break;
case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break;
@@ -257,33 +263,31 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::kw_appending: // OptionalLinkage
case lltok::kw_common: // OptionalLinkage
case lltok::kw_extern_weak: // OptionalLinkage
- case lltok::kw_external: { // OptionalLinkage
+ case lltok::kw_external: // OptionalLinkage
+ case lltok::kw_default: // OptionalVisibility
+ case lltok::kw_hidden: // OptionalVisibility
+ case lltok::kw_protected: // OptionalVisibility
+ case lltok::kw_dllimport: // OptionalDLLStorageClass
+ case lltok::kw_dllexport: // OptionalDLLStorageClass
+ case lltok::kw_thread_local: // OptionalThreadLocal
+ case lltok::kw_addrspace: // OptionalAddrSpace
+ case lltok::kw_constant: // GlobalType
+ case lltok::kw_global: { // GlobalType
unsigned Linkage, Visibility, DLLStorageClass;
- if (ParseOptionalLinkage(Linkage) ||
+ bool UnnamedAddr;
+ GlobalVariable::ThreadLocalMode TLM;
+ bool HasLinkage;
+ if (ParseOptionalLinkage(Linkage, HasLinkage) ||
ParseOptionalVisibility(Visibility) ||
ParseOptionalDLLStorageClass(DLLStorageClass) ||
- ParseGlobal("", SMLoc(), Linkage, true, Visibility, DLLStorageClass))
- return true;
- break;
- }
- case lltok::kw_default: // OptionalVisibility
- case lltok::kw_hidden: // OptionalVisibility
- case lltok::kw_protected: { // OptionalVisibility
- unsigned Visibility, DLLStorageClass;
- if (ParseOptionalVisibility(Visibility) ||
- ParseOptionalDLLStorageClass(DLLStorageClass) ||
- ParseGlobal("", SMLoc(), 0, false, Visibility, DLLStorageClass))
+ ParseOptionalThreadLocal(TLM) ||
+ parseOptionalUnnamedAddr(UnnamedAddr) ||
+ ParseGlobal("", SMLoc(), Linkage, HasLinkage, Visibility,
+ DLLStorageClass, TLM, UnnamedAddr))
return true;
break;
}
- case lltok::kw_thread_local: // OptionalThreadLocal
- case lltok::kw_addrspace: // OptionalAddrSpace
- case lltok::kw_constant: // GlobalType
- case lltok::kw_global: // GlobalType
- if (ParseGlobal("", SMLoc(), 0, false, 0, 0)) return true;
- break;
-
case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
}
}
@@ -470,15 +474,20 @@ bool LLParser::ParseUnnamedGlobal() {
bool HasLinkage;
unsigned Linkage, Visibility, DLLStorageClass;
+ GlobalVariable::ThreadLocalMode TLM;
+ bool UnnamedAddr;
if (ParseOptionalLinkage(Linkage, HasLinkage) ||
ParseOptionalVisibility(Visibility) ||
- ParseOptionalDLLStorageClass(DLLStorageClass))
+ ParseOptionalDLLStorageClass(DLLStorageClass) ||
+ ParseOptionalThreadLocal(TLM) ||
+ parseOptionalUnnamedAddr(UnnamedAddr))
return true;
if (HasLinkage || Lex.getKind() != lltok::kw_alias)
return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
- DLLStorageClass);
- return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass);
+ DLLStorageClass, TLM, UnnamedAddr);
+ return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM,
+ UnnamedAddr);
}
/// ParseNamedGlobal:
@@ -493,16 +502,71 @@ bool LLParser::ParseNamedGlobal() {
bool HasLinkage;
unsigned Linkage, Visibility, DLLStorageClass;
+ GlobalVariable::ThreadLocalMode TLM;
+ bool UnnamedAddr;
if (ParseToken(lltok::equal, "expected '=' in global variable") ||
ParseOptionalLinkage(Linkage, HasLinkage) ||
ParseOptionalVisibility(Visibility) ||
- ParseOptionalDLLStorageClass(DLLStorageClass))
+ ParseOptionalDLLStorageClass(DLLStorageClass) ||
+ ParseOptionalThreadLocal(TLM) ||
+ parseOptionalUnnamedAddr(UnnamedAddr))
return true;
if (HasLinkage || Lex.getKind() != lltok::kw_alias)
return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
- DLLStorageClass);
- return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass);
+ DLLStorageClass, TLM, UnnamedAddr);
+ return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM,
+ UnnamedAddr);
+}
+
+bool LLParser::parseComdat() {
+ assert(Lex.getKind() == lltok::ComdatVar);
+ std::string Name = Lex.getStrVal();
+ LocTy NameLoc = Lex.getLoc();
+ Lex.Lex();
+
+ if (ParseToken(lltok::equal, "expected '=' here"))
+ return true;
+
+ if (ParseToken(lltok::kw_comdat, "expected comdat keyword"))
+ return TokError("expected comdat type");
+
+ Comdat::SelectionKind SK;
+ switch (Lex.getKind()) {
+ default:
+ return TokError("unknown selection kind");
+ case lltok::kw_any:
+ SK = Comdat::Any;
+ break;
+ case lltok::kw_exactmatch:
+ SK = Comdat::ExactMatch;
+ break;
+ case lltok::kw_largest:
+ SK = Comdat::Largest;
+ break;
+ case lltok::kw_noduplicates:
+ SK = Comdat::NoDuplicates;
+ break;
+ case lltok::kw_samesize:
+ SK = Comdat::SameSize;
+ break;
+ }
+ Lex.Lex();
+
+ // See if the comdat was forward referenced, if so, use the comdat.
+ Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable();
+ Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name);
+ if (I != ComdatSymTab.end() && !ForwardRefComdats.erase(Name))
+ return Error(NameLoc, "redefinition of comdat '$" + Name + "'");
+
+ Comdat *C;
+ if (I != ComdatSymTab.end())
+ C = &I->second;
+ else
+ C = M->getOrInsertComdat(Name);
+ C->setSelectionKind(SK);
+
+ return false;
}
// MDString:
@@ -510,6 +574,7 @@ bool LLParser::ParseNamedGlobal() {
bool LLParser::ParseMDString(MDString *&Result) {
std::string Str;
if (ParseStringConstant(Str)) return true;
+ llvm::UpgradeMDStringConstant(Str);
Result = MDString::get(Context, Str);
return false;
}
@@ -628,18 +693,19 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
}
/// ParseAlias:
-/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias'
+/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass
+/// OptionalThreadLocal OptionalUnNammedAddr 'alias'
/// OptionalLinkage Aliasee
-/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias'
-/// OptionalLinkage OptionalAddrSpace Type, Aliasee
///
/// Aliasee
/// ::= TypeAndValue
///
-/// Everything through DLL storage class has already been parsed.
+/// Everything through OptionalUnNammedAddr has already been parsed.
///
bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
- unsigned Visibility, unsigned DLLStorageClass) {
+ unsigned Visibility, unsigned DLLStorageClass,
+ GlobalVariable::ThreadLocalMode TLM,
+ bool UnnamedAddr) {
assert(Lex.getKind() == lltok::kw_alias);
Lex.Lex();
LocTy LinkageLoc = Lex.getLoc();
@@ -656,51 +722,39 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
return Error(LinkageLoc,
"symbol with local linkage must have default visibility");
- bool HasAddrSpace = Lex.getKind() == lltok::kw_addrspace;
- unsigned AddrSpace;
- LocTy AddrSpaceLoc = Lex.getLoc();
- if (ParseOptionalAddrSpace(AddrSpace))
- return true;
-
- LocTy TyLoc = Lex.getLoc();
- Type *Ty = nullptr;
- if (ParseType(Ty))
- return true;
-
- bool DifferentType = EatIfPresent(lltok::comma);
- if (HasAddrSpace && !DifferentType)
- return Error(AddrSpaceLoc, "A type is required if addrspace is given");
-
- Type *AliaseeType = nullptr;
- if (DifferentType) {
- if (ParseType(AliaseeType))
+ Constant *Aliasee;
+ LocTy AliaseeLoc = Lex.getLoc();
+ if (Lex.getKind() != lltok::kw_bitcast &&
+ Lex.getKind() != lltok::kw_getelementptr &&
+ Lex.getKind() != lltok::kw_addrspacecast &&
+ Lex.getKind() != lltok::kw_inttoptr) {
+ if (ParseGlobalTypeAndValue(Aliasee))
return true;
} else {
- AliaseeType = Ty;
- auto *PTy = dyn_cast<PointerType>(Ty);
- if (!PTy)
- return Error(TyLoc, "An alias must have pointer type");
- Ty = PTy->getElementType();
- AddrSpace = PTy->getAddressSpace();
+ // The bitcast dest type is not present, it is implied by the dest type.
+ ValID ID;
+ if (ParseValID(ID))
+ return true;
+ if (ID.Kind != ValID::t_Constant)
+ return Error(AliaseeLoc, "invalid aliasee");
+ Aliasee = ID.ConstantVal;
}
- LocTy AliaseeLoc = Lex.getLoc();
- Constant *C;
- if (ParseGlobalValue(AliaseeType, C))
- return true;
-
- auto *Aliasee = dyn_cast<GlobalObject>(C);
- if (!Aliasee)
- return Error(AliaseeLoc, "Alias must point to function or variable");
-
- assert(Aliasee->getType()->isPointerTy());
+ Type *AliaseeType = Aliasee->getType();
+ auto *PTy = dyn_cast<PointerType>(AliaseeType);
+ if (!PTy)
+ return Error(AliaseeLoc, "An alias must have pointer type");
+ Type *Ty = PTy->getElementType();
+ unsigned AddrSpace = PTy->getAddressSpace();
// Okay, create the alias but do not insert it into the module yet.
std::unique_ptr<GlobalAlias> GA(
GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage,
Name, Aliasee, /*Parent*/ nullptr));
+ GA->setThreadLocalMode(TLM);
GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
+ GA->setUnnamedAddr(UnnamedAddr);
// See if this value already exists in the symbol table. If so, it is either
// a redefinition or a definition of a forward reference.
@@ -720,11 +774,6 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
// If they agree, just RAUW the old value with the alias and remove the
// forward ref info.
- for (auto *User : Val->users()) {
- if (auto *GA = dyn_cast<GlobalAlias>(User))
- return Error(NameLoc, "Alias is pointed by alias " + GA->getName());
- }
-
Val->replaceAllUsesWith(GA.get());
Val->eraseFromParent();
ForwardRefVals.erase(I);
@@ -742,34 +791,31 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
/// ParseGlobal
/// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalDLLStorageClass
-/// OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr
+/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace
/// OptionalExternallyInitialized GlobalType Type Const
/// ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass
-/// OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr
+/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace
/// OptionalExternallyInitialized GlobalType Type Const
///
-/// Everything up to and including OptionalDLLStorageClass has been parsed
+/// Everything up to and including OptionalUnNammedAddr has been parsed
/// already.
///
bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
unsigned Linkage, bool HasLinkage,
- unsigned Visibility, unsigned DLLStorageClass) {
+ unsigned Visibility, unsigned DLLStorageClass,
+ GlobalVariable::ThreadLocalMode TLM,
+ bool UnnamedAddr) {
if (!isValidVisibilityForLinkage(Visibility, Linkage))
return Error(NameLoc,
"symbol with local linkage must have default visibility");
unsigned AddrSpace;
- bool IsConstant, UnnamedAddr, IsExternallyInitialized;
- GlobalVariable::ThreadLocalMode TLM;
- LocTy UnnamedAddrLoc;
+ bool IsConstant, IsExternallyInitialized;
LocTy IsExternallyInitializedLoc;
LocTy TyLoc;
Type *Ty = nullptr;
- if (ParseOptionalThreadLocal(TLM) ||
- ParseOptionalAddrSpace(AddrSpace) ||
- ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
- &UnnamedAddrLoc) ||
+ if (ParseOptionalAddrSpace(AddrSpace) ||
ParseOptionalToken(lltok::kw_externally_initialized,
IsExternallyInitialized,
&IsExternallyInitializedLoc) ||
@@ -848,7 +894,13 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (ParseOptionalAlignment(Alignment)) return true;
GV->setAlignment(Alignment);
} else {
- TokError("unknown global variable property!");
+ Comdat *C;
+ if (parseOptionalComdat(C))
+ return true;
+ if (C)
+ GV->setComdat(C);
+ else
+ return TokError("unknown global variable property!");
}
}
@@ -967,6 +1019,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
+ case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
case lltok::kw_naked: B.addAttribute(Attribute::Naked); break;
case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break;
@@ -1106,6 +1159,24 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
//===----------------------------------------------------------------------===//
+// Comdat Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+Comdat *LLParser::getComdat(const std::string &Name, LocTy Loc) {
+ // Look this name up in the comdat symbol table.
+ Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable();
+ Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name);
+ if (I != ComdatSymTab.end())
+ return &I->second;
+
+ // Otherwise, create a new forward reference for this value and remember it.
+ Comdat *C = M->getOrInsertComdat(Name);
+ ForwardRefComdats[Name] = Loc;
+ return C;
+}
+
+
+//===----------------------------------------------------------------------===//
// Helper Routines.
//===----------------------------------------------------------------------===//
@@ -1230,6 +1301,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_alwaysinline:
case lltok::kw_builtin:
case lltok::kw_inlinehint:
+ case lltok::kw_jumptable:
case lltok::kw_minsize:
case lltok::kw_naked:
case lltok::kw_nobuiltin:
@@ -1291,6 +1363,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_builtin:
case lltok::kw_cold:
case lltok::kw_inlinehint:
+ case lltok::kw_jumptable:
case lltok::kw_minsize:
case lltok::kw_naked:
case lltok::kw_nobuiltin:
@@ -2797,6 +2870,19 @@ bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
ParseGlobalValue(Ty, V);
}
+bool LLParser::parseOptionalComdat(Comdat *&C) {
+ C = nullptr;
+ if (!EatIfPresent(lltok::kw_comdat))
+ return false;
+ if (Lex.getKind() != lltok::ComdatVar)
+ return TokError("expected comdat variable");
+ LocTy Loc = Lex.getLoc();
+ StringRef Name = Lex.getStrVal();
+ C = getComdat(Name, Loc);
+ Lex.Lex();
+ return false;
+}
+
/// ParseGlobalValueVector
/// ::= /*empty*/
/// ::= TypeAndValue (',' TypeAndValue)*
@@ -3097,6 +3183,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
bool UnnamedAddr;
LocTy UnnamedAddrLoc;
Constant *Prefix = nullptr;
+ Comdat *C;
if (ParseArgumentList(ArgList, isVarArg) ||
ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -3105,6 +3192,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
BuiltinLoc) ||
(EatIfPresent(lltok::kw_section) &&
ParseStringConstant(Section)) ||
+ parseOptionalComdat(C) ||
ParseOptionalAlignment(Alignment) ||
(EatIfPresent(lltok::kw_gc) &&
ParseStringConstant(GC)) ||
@@ -3207,6 +3295,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
Fn->setUnnamedAddr(UnnamedAddr);
Fn->setAlignment(Alignment);
Fn->setSection(Section);
+ Fn->setComdat(C);
if (!GC.empty()) Fn->setGC(GC.c_str());
Fn->setPrefixData(Prefix);
ForwardRefAttrGroups[Fn] = FwdRefAttrGrps;
@@ -4011,7 +4100,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
else
return TokError("expected 'catch' or 'filter' clause type");
- Value *V; LocTy VLoc;
+ Value *V;
+ LocTy VLoc;
if (ParseTypeAndValue(V, VLoc, PFS)) {
delete LP;
return true;
@@ -4027,7 +4117,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
Error(VLoc, "'filter' clause has an invalid type");
}
- LP->addClause(V);
+ LP->addClause(cast<Constant>(V));
}
Inst = LP;
@@ -4263,8 +4353,8 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
}
/// ParseCmpXchg
-/// ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue
-/// 'singlethread'? AtomicOrdering AtomicOrdering
+/// ::= 'cmpxchg' 'weak'? 'volatile'? TypeAndValue ',' TypeAndValue ','
+/// TypeAndValue 'singlethread'? AtomicOrdering AtomicOrdering
int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc;
bool AteExtraComma = false;
@@ -4272,6 +4362,10 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
AtomicOrdering FailureOrdering = NotAtomic;
SynchronizationScope Scope = CrossThread;
bool isVolatile = false;
+ bool isWeak = false;
+
+ if (EatIfPresent(lltok::kw_weak))
+ isWeak = true;
if (EatIfPresent(lltok::kw_volatile))
isVolatile = true;
@@ -4304,9 +4398,10 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized"
" integer");
- AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering,
- FailureOrdering, Scope);
+ AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(
+ Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope);
CXI->setVolatile(isVolatile);
+ CXI->setWeak(isWeak);
Inst = CXI;
return AteExtraComma ? InstExtraComma : InstNormal;
}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index e2bf462..2efb260 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -34,6 +34,7 @@ namespace llvm {
class Instruction;
class Constant;
class GlobalValue;
+ class Comdat;
class MDString;
class MDNode;
class StructType;
@@ -122,6 +123,9 @@ namespace llvm {
std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
std::vector<GlobalValue*> NumberedVals;
+ // Comdat forward reference information.
+ std::map<std::string, LocTy> ForwardRefComdats;
+
// References to blockaddress. The key is the function ValID, the value is
// a list of references to blocks in that function.
std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
@@ -154,6 +158,10 @@ namespace llvm {
GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
+ /// Get a Comdat with the specified name, creating a forward reference
+ /// record if needed.
+ Comdat *getComdat(const std::string &N, LocTy Loc);
+
// Helper Routines.
bool ParseToken(lltok::Kind T, const char *ErrMsg);
bool EatIfPresent(lltok::Kind T) {
@@ -197,6 +205,9 @@ namespace llvm {
bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
+ bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
+ return ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr);
+ }
bool ParseOptionalAddrSpace(unsigned &AddrSpace);
bool ParseOptionalParamAttrs(AttrBuilder &B);
bool ParseOptionalReturnAttrs(AttrBuilder &B);
@@ -239,9 +250,12 @@ namespace llvm {
bool ParseNamedGlobal();
bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
bool HasLinkage, unsigned Visibility,
- unsigned DLLStorageClass);
+ unsigned DLLStorageClass,
+ GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility,
- unsigned DLLStorageClass);
+ unsigned DLLStorageClass,
+ GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
+ bool parseComdat();
bool ParseStandaloneMetadata();
bool ParseNamedMetadata();
bool ParseMDString(MDString *&Result);
@@ -353,6 +367,7 @@ namespace llvm {
bool ParseGlobalValue(Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool parseOptionalComdat(Comdat *&C);
bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index b6b7d82..534d824 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -42,7 +42,8 @@ namespace lltok {
kw_linker_private, // NOTE: deprecated, for parser compatibility
kw_linker_private_weak, // NOTE: deprecated, for parser compatibility
kw_linkonce, kw_linkonce_odr,
- kw_weak, kw_weak_odr, kw_appending,
+ kw_weak, // Used as a linkage, and a modifier for "cmpxchg".
+ kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
kw_unnamed_addr,
@@ -107,6 +108,7 @@ namespace lltok {
kw_cold,
kw_inlinehint,
kw_inreg,
+ kw_jumptable,
kw_minsize,
kw_naked,
kw_nest,
@@ -140,6 +142,15 @@ namespace lltok {
kw_type,
kw_opaque,
+ kw_comdat,
+
+ // Comdat types
+ kw_any,
+ kw_exactmatch,
+ kw_largest,
+ kw_noduplicates,
+ kw_samesize,
+
kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
kw_ueq, kw_une,
@@ -178,6 +189,7 @@ namespace lltok {
// String valued tokens (StrVal).
LabelStr, // foo:
GlobalVar, // @foo @"foo"
+ ComdatVar, // $foo
LocalVar, // %foo %"foo"
MetadataVar, // !foo
StringConstant, // "foo"
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 2606bc2..91bb51c 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -17,8 +17,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <cstring>
+#include <system_error>
using namespace llvm;
Module *llvm::ParseAssembly(MemoryBuffer *F,
@@ -41,21 +41,21 @@ Module *llvm::ParseAssembly(MemoryBuffer *F,
Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
LLVMContext &Context) {
- std::unique_ptr<MemoryBuffer> File;
- if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = FileOrErr.getError()) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
- "Could not open input file: " + ec.message());
+ "Could not open input file: " + EC.message());
return nullptr;
}
- return ParseAssembly(File.release(), nullptr, Err, Context);
+ return ParseAssembly(FileOrErr.get().release(), nullptr, Err, Context);
}
Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
SMDiagnostic &Err, LLVMContext &Context) {
MemoryBuffer *F =
- MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)),
- "<string>");
+ MemoryBuffer::getMemBuffer(StringRef(AsmString), "<string>");
return ParseAssembly(F, M, Err, Context);
}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 716299f..b5886c1 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -32,7 +32,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
char **OutMessage) {
ErrorOr<Module *> ModuleOrErr =
parseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef));
- if (error_code EC = ModuleOrErr.getError()) {
+ if (std::error_code EC = ModuleOrErr.getError()) {
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
*OutModule = wrap((Module*)nullptr);
@@ -54,7 +54,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
ErrorOr<Module *> ModuleOrErr =
getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef));
- if (error_code EC = ModuleOrErr.getError()) {
+ if (std::error_code EC = ModuleOrErr.getError()) {
*OutM = wrap((Module *)nullptr);
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4170f98..192f753 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -39,12 +39,11 @@ void BitcodeReader::materializeForwardReferencedFunctions() {
}
void BitcodeReader::FreeState() {
- if (BufferOwned)
- delete Buffer;
Buffer = nullptr;
std::vector<Type*>().swap(TypeList);
ValueList.clear();
MDValueList.clear();
+ std::vector<Comdat *>().swap(ComdatList);
std::vector<AttributeSet>().swap(MAttributes);
std::vector<BasicBlock*>().swap(FunctionBBs);
@@ -205,6 +204,22 @@ static SynchronizationScope GetDecodedSynchScope(unsigned Val) {
}
}
+static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {
+ switch (Val) {
+ default: // Map unknown selection kinds to any.
+ case bitc::COMDAT_SELECTION_KIND_ANY:
+ return Comdat::Any;
+ case bitc::COMDAT_SELECTION_KIND_EXACT_MATCH:
+ return Comdat::ExactMatch;
+ case bitc::COMDAT_SELECTION_KIND_LARGEST:
+ return Comdat::Largest;
+ case bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES:
+ return Comdat::NoDuplicates;
+ case bitc::COMDAT_SELECTION_KIND_SAME_SIZE:
+ return Comdat::SameSize;
+ }
+}
+
static void UpgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
switch (Val) {
case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
@@ -470,7 +485,7 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
(EncodedAttrs & 0xffff));
}
-error_code BitcodeReader::ParseAttributeBlock() {
+std::error_code BitcodeReader::ParseAttributeBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
return Error(InvalidRecord);
@@ -490,7 +505,7 @@ error_code BitcodeReader::ParseAttributeBlock() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -549,6 +564,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
return Attribute::InlineHint;
case bitc::ATTR_KIND_IN_REG:
return Attribute::InReg;
+ case bitc::ATTR_KIND_JUMP_TABLE:
+ return Attribute::JumpTable;
case bitc::ATTR_KIND_MIN_SIZE:
return Attribute::MinSize;
case bitc::ATTR_KIND_NAKED:
@@ -614,15 +631,15 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
}
}
-error_code BitcodeReader::ParseAttrKind(uint64_t Code,
- Attribute::AttrKind *Kind) {
+std::error_code BitcodeReader::ParseAttrKind(uint64_t Code,
+ Attribute::AttrKind *Kind) {
*Kind = GetAttrFromCode(Code);
if (*Kind == Attribute::None)
return Error(InvalidValue);
- return error_code::success();
+ return std::error_code();
}
-error_code BitcodeReader::ParseAttributeGroupBlock() {
+std::error_code BitcodeReader::ParseAttributeGroupBlock() {
if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
return Error(InvalidRecord);
@@ -640,7 +657,7 @@ error_code BitcodeReader::ParseAttributeGroupBlock() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -662,13 +679,13 @@ error_code BitcodeReader::ParseAttributeGroupBlock() {
for (unsigned i = 2, e = Record.size(); i != e; ++i) {
if (Record[i] == 0) { // Enum attribute
Attribute::AttrKind Kind;
- if (error_code EC = ParseAttrKind(Record[++i], &Kind))
+ if (std::error_code EC = ParseAttrKind(Record[++i], &Kind))
return EC;
B.addAttribute(Kind);
} else if (Record[i] == 1) { // Align attribute
Attribute::AttrKind Kind;
- if (error_code EC = ParseAttrKind(Record[++i], &Kind))
+ if (std::error_code EC = ParseAttrKind(Record[++i], &Kind))
return EC;
if (Kind == Attribute::Alignment)
B.addAlignmentAttr(Record[++i]);
@@ -704,14 +721,14 @@ error_code BitcodeReader::ParseAttributeGroupBlock() {
}
}
-error_code BitcodeReader::ParseTypeTable() {
+std::error_code BitcodeReader::ParseTypeTable() {
if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
return Error(InvalidRecord);
return ParseTypeTableBody();
}
-error_code BitcodeReader::ParseTypeTableBody() {
+std::error_code BitcodeReader::ParseTypeTableBody() {
if (!TypeList.empty())
return Error(InvalidMultipleBlocks);
@@ -731,7 +748,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
case BitstreamEntry::EndBlock:
if (NumRecords != TypeList.size())
return Error(MalformedBlock);
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -931,7 +948,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
}
}
-error_code BitcodeReader::ParseValueSymbolTable() {
+std::error_code BitcodeReader::ParseValueSymbolTable() {
if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
return Error(InvalidRecord);
@@ -947,7 +964,7 @@ error_code BitcodeReader::ParseValueSymbolTable() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -985,7 +1002,7 @@ error_code BitcodeReader::ParseValueSymbolTable() {
}
}
-error_code BitcodeReader::ParseMetadata() {
+std::error_code BitcodeReader::ParseMetadata() {
unsigned NextMDValueNo = MDValueList.size();
if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
@@ -1002,7 +1019,7 @@ error_code BitcodeReader::ParseMetadata() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1062,7 +1079,8 @@ error_code BitcodeReader::ParseMetadata() {
break;
}
case bitc::METADATA_STRING: {
- SmallString<8> String(Record.begin(), Record.end());
+ std::string String(Record.begin(), Record.end());
+ llvm::UpgradeMDStringConstant(String);
Value *V = MDString::get(Context, String);
MDValueList.AssignValue(V, NextMDValueNo++);
break;
@@ -1094,31 +1112,9 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
return 1ULL << 63;
}
-// FIXME: Delete this in LLVM 4.0 and just assert that the aliasee is a
-// GlobalObject.
-static GlobalObject &
-getGlobalObjectInExpr(const DenseMap<GlobalAlias *, Constant *> &Map,
- Constant &C) {
- auto *GO = dyn_cast<GlobalObject>(&C);
- if (GO)
- return *GO;
-
- auto *GA = dyn_cast<GlobalAlias>(&C);
- if (GA)
- return getGlobalObjectInExpr(Map, *Map.find(GA)->second);
-
- auto &CE = cast<ConstantExpr>(C);
- assert(CE.getOpcode() == Instruction::BitCast ||
- CE.getOpcode() == Instruction::GetElementPtr ||
- CE.getOpcode() == Instruction::AddrSpaceCast);
- if (CE.getOpcode() == Instruction::GetElementPtr)
- assert(cast<GEPOperator>(CE).hasAllZeroIndices());
- return getGlobalObjectInExpr(Map, *CE.getOperand(0));
-}
-
/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
/// values and aliases that we can.
-error_code BitcodeReader::ResolveGlobalAndAliasInits() {
+std::error_code BitcodeReader::ResolveGlobalAndAliasInits() {
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
std::vector<std::pair<Function*, unsigned> > FunctionPrefixWorklist;
@@ -1141,30 +1137,19 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() {
GlobalInitWorklist.pop_back();
}
- // FIXME: Delete this in LLVM 4.0
- // Older versions of llvm could write an alias pointing to another. We cannot
- // construct those aliases, so we first collect an alias to aliasee expression
- // and then compute the actual aliasee.
- DenseMap<GlobalAlias *, Constant *> AliasInit;
-
while (!AliasInitWorklist.empty()) {
unsigned ValID = AliasInitWorklist.back().second;
if (ValID >= ValueList.size()) {
AliasInits.push_back(AliasInitWorklist.back());
} else {
if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- AliasInit.insert(std::make_pair(AliasInitWorklist.back().first, C));
+ AliasInitWorklist.back().first->setAliasee(C);
else
return Error(ExpectedConstant);
}
AliasInitWorklist.pop_back();
}
- for (auto &Pair : AliasInit) {
- auto &GO = getGlobalObjectInExpr(AliasInit, *Pair.second);
- Pair.first->setAliasee(&GO);
- }
-
while (!FunctionPrefixWorklist.empty()) {
unsigned ValID = FunctionPrefixWorklist.back().second;
if (ValID >= ValueList.size()) {
@@ -1178,7 +1163,7 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() {
FunctionPrefixWorklist.pop_back();
}
- return error_code::success();
+ return std::error_code();
}
static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
@@ -1189,7 +1174,7 @@ static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
return APInt(TypeBits, Words);
}
-error_code BitcodeReader::ParseConstants() {
+std::error_code BitcodeReader::ParseConstants() {
if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
return Error(InvalidRecord);
@@ -1212,7 +1197,7 @@ error_code BitcodeReader::ParseConstants() {
// Once all the constants have been read, go through and resolve forward
// references.
ValueList.ResolveConstantForwardRefs();
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1627,7 +1612,7 @@ error_code BitcodeReader::ParseConstants() {
}
}
-error_code BitcodeReader::ParseUseLists() {
+std::error_code BitcodeReader::ParseUseLists() {
if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
return Error(InvalidRecord);
@@ -1642,7 +1627,7 @@ error_code BitcodeReader::ParseUseLists() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -1667,7 +1652,7 @@ error_code BitcodeReader::ParseUseLists() {
/// RememberAndSkipFunctionBody - When we see the block for a function body,
/// remember where it is and then skip it. This lets us lazily deserialize the
/// functions.
-error_code BitcodeReader::RememberAndSkipFunctionBody() {
+std::error_code BitcodeReader::RememberAndSkipFunctionBody() {
// Get the function we are talking about.
if (FunctionsWithBodies.empty())
return Error(InsufficientFunctionProtos);
@@ -1682,10 +1667,10 @@ error_code BitcodeReader::RememberAndSkipFunctionBody() {
// Skip over the function block for now.
if (Stream.SkipBlock())
return Error(InvalidRecord);
- return error_code::success();
+ return std::error_code();
}
-error_code BitcodeReader::GlobalCleanup() {
+std::error_code BitcodeReader::GlobalCleanup() {
// Patch the initializers for globals and aliases up.
ResolveGlobalAndAliasInits();
if (!GlobalInits.empty() || !AliasInits.empty())
@@ -1711,10 +1696,10 @@ error_code BitcodeReader::GlobalCleanup() {
// want lazy deserialization.
std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
- return error_code::success();
+ return std::error_code();
}
-error_code BitcodeReader::ParseModule(bool Resume) {
+std::error_code BitcodeReader::ParseModule(bool Resume) {
if (Resume)
Stream.JumpToBit(NextUnreadBit);
else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
@@ -1745,30 +1730,30 @@ error_code BitcodeReader::ParseModule(bool Resume) {
return Error(MalformedBlock);
break;
case bitc::PARAMATTR_BLOCK_ID:
- if (error_code EC = ParseAttributeBlock())
+ if (std::error_code EC = ParseAttributeBlock())
return EC;
break;
case bitc::PARAMATTR_GROUP_BLOCK_ID:
- if (error_code EC = ParseAttributeGroupBlock())
+ if (std::error_code EC = ParseAttributeGroupBlock())
return EC;
break;
case bitc::TYPE_BLOCK_ID_NEW:
- if (error_code EC = ParseTypeTable())
+ if (std::error_code EC = ParseTypeTable())
return EC;
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (error_code EC = ParseValueSymbolTable())
+ if (std::error_code EC = ParseValueSymbolTable())
return EC;
SeenValueSymbolTable = true;
break;
case bitc::CONSTANTS_BLOCK_ID:
- if (error_code EC = ParseConstants())
+ if (std::error_code EC = ParseConstants())
return EC;
- if (error_code EC = ResolveGlobalAndAliasInits())
+ if (std::error_code EC = ResolveGlobalAndAliasInits())
return EC;
break;
case bitc::METADATA_BLOCK_ID:
- if (error_code EC = ParseMetadata())
+ if (std::error_code EC = ParseMetadata())
return EC;
break;
case bitc::FUNCTION_BLOCK_ID:
@@ -1776,12 +1761,12 @@ error_code BitcodeReader::ParseModule(bool Resume) {
// FunctionsWithBodies list.
if (!SeenFirstFunctionBody) {
std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
- if (error_code EC = GlobalCleanup())
+ if (std::error_code EC = GlobalCleanup())
return EC;
SeenFirstFunctionBody = true;
}
- if (error_code EC = RememberAndSkipFunctionBody())
+ if (std::error_code EC = RememberAndSkipFunctionBody())
return EC;
// For streaming bitcode, suspend parsing when we reach the function
// bodies. Subsequent materialization calls will resume it when
@@ -1791,11 +1776,11 @@ error_code BitcodeReader::ParseModule(bool Resume) {
// just finish the parse now.
if (LazyStreamer && SeenValueSymbolTable) {
NextUnreadBit = Stream.GetCurrentBitNo();
- return error_code::success();
+ return std::error_code();
}
break;
case bitc::USELIST_BLOCK_ID:
- if (error_code EC = ParseUseLists())
+ if (std::error_code EC = ParseUseLists())
return EC;
break;
}
@@ -1870,6 +1855,20 @@ error_code BitcodeReader::ParseModule(bool Resume) {
GCTable.push_back(S);
break;
}
+ case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name]
+ if (Record.size() < 2)
+ return Error(InvalidRecord);
+ Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]);
+ unsigned ComdatNameSize = Record[1];
+ std::string ComdatName;
+ ComdatName.reserve(ComdatNameSize);
+ for (unsigned i = 0; i != ComdatNameSize; ++i)
+ ComdatName += (char)Record[2 + i];
+ Comdat *C = TheModule->getOrInsertComdat(ComdatName);
+ C->setSelectionKind(SK);
+ ComdatList.push_back(C);
+ break;
+ }
// GLOBALVAR: [pointer type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, dllstorageclass]
@@ -1930,6 +1929,12 @@ error_code BitcodeReader::ParseModule(bool Resume) {
// Remember which value to use for the global initializer.
if (unsigned InitID = Record[2])
GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+
+ if (Record.size() > 11)
+ if (unsigned ComdatID = Record[11]) {
+ assert(ComdatID <= ComdatList.size());
+ NewGV->setComdat(ComdatList[ComdatID - 1]);
+ }
break;
}
// FUNCTION: [type, callingconv, isproto, linkage, paramattr,
@@ -1983,6 +1988,12 @@ error_code BitcodeReader::ParseModule(bool Resume) {
else
UpgradeDLLImportExportLinkage(Func, Record[3]);
+ if (Record.size() > 12)
+ if (unsigned ComdatID = Record[12]) {
+ assert(ComdatID <= ComdatList.size());
+ Func->setComdat(ComdatList[ComdatID - 1]);
+ }
+
ValueList.push_back(Func);
// If this is a function with a body, remember the prototype we are
@@ -2017,6 +2028,10 @@ error_code BitcodeReader::ParseModule(bool Resume) {
NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4]));
else
UpgradeDLLImportExportLinkage(NewGA, Record[2]);
+ if (Record.size() > 5)
+ NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5]));
+ if (Record.size() > 6)
+ NewGA->setUnnamedAddr(Record[6]);
ValueList.push_back(NewGA);
AliasInits.push_back(std::make_pair(NewGA, Record[1]));
break;
@@ -2033,10 +2048,10 @@ error_code BitcodeReader::ParseModule(bool Resume) {
}
}
-error_code BitcodeReader::ParseBitcodeInto(Module *M) {
+std::error_code BitcodeReader::ParseBitcodeInto(Module *M) {
TheModule = nullptr;
- if (error_code EC = InitStream())
+ if (std::error_code EC = InitStream())
return EC;
// Sniff for the signature.
@@ -2052,7 +2067,7 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) {
// need to understand them all.
while (1) {
if (Stream.AtEndOfStream())
- return error_code::success();
+ return std::error_code();
BitstreamEntry Entry =
Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
@@ -2061,7 +2076,7 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::SubBlock:
switch (Entry.ID) {
@@ -2074,10 +2089,10 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) {
if (TheModule)
return Error(InvalidMultipleBlocks);
TheModule = M;
- if (error_code EC = ParseModule(false))
+ if (std::error_code EC = ParseModule(false))
return EC;
if (LazyStreamer)
- return error_code::success();
+ return std::error_code();
break;
default:
if (Stream.SkipBlock())
@@ -2094,19 +2109,20 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) {
if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
Stream.AtEndOfStream())
- return error_code::success();
+ return std::error_code();
return Error(InvalidRecord);
}
}
}
-error_code BitcodeReader::ParseModuleTriple(std::string &Triple) {
+ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return Error(InvalidRecord);
SmallVector<uint64_t, 64> Record;
+ std::string Triple;
// Read all the records for this module.
while (1) {
BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
@@ -2116,7 +2132,7 @@ error_code BitcodeReader::ParseModuleTriple(std::string &Triple) {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return Triple;
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -2135,10 +2151,11 @@ error_code BitcodeReader::ParseModuleTriple(std::string &Triple) {
}
Record.clear();
}
+ llvm_unreachable("Exit infinite loop");
}
-error_code BitcodeReader::ParseTriple(std::string &Triple) {
- if (error_code EC = InitStream())
+ErrorOr<std::string> BitcodeReader::parseTriple() {
+ if (std::error_code EC = InitStream())
return EC;
// Sniff for the signature.
@@ -2159,11 +2176,11 @@ error_code BitcodeReader::ParseTriple(std::string &Triple) {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::MODULE_BLOCK_ID)
- return ParseModuleTriple(Triple);
+ return parseModuleTriple();
// Ignore other sub-blocks.
if (Stream.SkipBlock())
@@ -2178,7 +2195,7 @@ error_code BitcodeReader::ParseTriple(std::string &Triple) {
}
/// ParseMetadataAttachment - Parse metadata attachments.
-error_code BitcodeReader::ParseMetadataAttachment() {
+std::error_code BitcodeReader::ParseMetadataAttachment() {
if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
return Error(InvalidRecord);
@@ -2191,7 +2208,7 @@ error_code BitcodeReader::ParseMetadataAttachment() {
case BitstreamEntry::Error:
return Error(MalformedBlock);
case BitstreamEntry::EndBlock:
- return error_code::success();
+ return std::error_code();
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -2225,7 +2242,7 @@ error_code BitcodeReader::ParseMetadataAttachment() {
}
/// ParseFunctionBody - Lazily parse the specified function body block.
-error_code BitcodeReader::ParseFunctionBody(Function *F) {
+std::error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
return Error(InvalidRecord);
@@ -2261,20 +2278,20 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
return Error(InvalidRecord);
break;
case bitc::CONSTANTS_BLOCK_ID:
- if (error_code EC = ParseConstants())
+ if (std::error_code EC = ParseConstants())
return EC;
NextValueNo = ValueList.size();
break;
case bitc::VALUE_SYMTAB_BLOCK_ID:
- if (error_code EC = ParseValueSymbolTable())
+ if (std::error_code EC = ParseValueSymbolTable())
return EC;
break;
case bitc::METADATA_ATTACHMENT_ID:
- if (error_code EC = ParseMetadataAttachment())
+ if (std::error_code EC = ParseMetadataAttachment())
return EC;
break;
case bitc::METADATA_BLOCK_ID:
- if (error_code EC = ParseMetadata())
+ if (std::error_code EC = ParseMetadata())
return EC;
break;
}
@@ -2857,7 +2874,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
assert((CT != LandingPadInst::Filter ||
isa<ArrayType>(Val->getType())) &&
"Filter clause has invalid type!");
- LP->addClause(Val);
+ LP->addClause(cast<Constant>(Val));
}
I = LP;
@@ -2950,7 +2967,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_CMPXCHG: {
// CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope,
- // failureordering]
+ // failureordering?, isweak?]
unsigned OpNum = 0;
Value *Ptr, *Cmp, *New;
if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -2958,7 +2975,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
cast<PointerType>(Ptr->getType())->getElementType(), Cmp) ||
popValue(Record, OpNum, NextValueNo,
cast<PointerType>(Ptr->getType())->getElementType(), New) ||
- (OpNum + 3 != Record.size() && OpNum + 4 != Record.size()))
+ (Record.size() < OpNum + 3 || Record.size() > OpNum + 5))
return Error(InvalidRecord);
AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]);
if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered)
@@ -2975,6 +2992,17 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering,
SynchScope);
cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+
+ if (Record.size() < 8) {
+ // Before weak cmpxchgs existed, the instruction simply returned the
+ // value loaded from memory, so bitcode files from that era will be
+ // expecting the first component of a modern cmpxchg.
+ CurBB->getInstList().push_back(I);
+ I = ExtractValueInst::Create(I, 0);
+ } else {
+ cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]);
+ }
+
InstructionList.push_back(I);
break;
}
@@ -3144,27 +3172,29 @@ OutOfRecordLoop:
ValueList.shrinkTo(ModuleValueListSize);
MDValueList.shrinkTo(ModuleMDValueListSize);
std::vector<BasicBlock*>().swap(FunctionBBs);
- return error_code::success();
+ return std::error_code();
}
/// Find the function body in the bitcode stream
-error_code BitcodeReader::FindFunctionInStream(Function *F,
- DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+std::error_code BitcodeReader::FindFunctionInStream(
+ Function *F,
+ DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
while (DeferredFunctionInfoIterator->second == 0) {
if (Stream.AtEndOfStream())
return Error(CouldNotFindFunctionInStream);
// ParseModule will parse the next body in the stream and set its
// position in the DeferredFunctionInfo map.
- if (error_code EC = ParseModule(true))
+ if (std::error_code EC = ParseModule(true))
return EC;
}
- return error_code::success();
+ return std::error_code();
}
//===----------------------------------------------------------------------===//
// GVMaterializer implementation
//===----------------------------------------------------------------------===//
+void BitcodeReader::releaseBuffer() { Buffer.release(); }
bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
if (const Function *F = dyn_cast<Function>(GV)) {
@@ -3174,24 +3204,24 @@ bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
return false;
}
-error_code BitcodeReader::Materialize(GlobalValue *GV) {
+std::error_code BitcodeReader::Materialize(GlobalValue *GV) {
Function *F = dyn_cast<Function>(GV);
// If it's not a function or is already material, ignore the request.
if (!F || !F->isMaterializable())
- return error_code::success();
+ return std::error_code();
DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
// If its position is recorded as 0, its body is somewhere in the stream
// but we haven't seen it yet.
if (DFII->second == 0 && LazyStreamer)
- if (error_code EC = FindFunctionInStream(F, DFII))
+ if (std::error_code EC = FindFunctionInStream(F, DFII))
return EC;
// Move the bit stream to the saved position of the deferred function body.
Stream.JumpToBit(DFII->second);
- if (error_code EC = ParseFunctionBody(F))
+ if (std::error_code EC = ParseFunctionBody(F))
return EC;
// Upgrade any old intrinsic calls in the function.
@@ -3206,7 +3236,7 @@ error_code BitcodeReader::Materialize(GlobalValue *GV) {
}
}
- return error_code::success();
+ return std::error_code();
}
bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
@@ -3228,8 +3258,7 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) {
F->deleteBody();
}
-
-error_code BitcodeReader::MaterializeModule(Module *M) {
+std::error_code BitcodeReader::MaterializeModule(Module *M) {
assert(M == TheModule &&
"Can only Materialize the Module this BitcodeReader is attached to.");
// Iterate over the module, deserializing any functions that are still on
@@ -3237,7 +3266,7 @@ error_code BitcodeReader::MaterializeModule(Module *M) {
for (Module::iterator F = TheModule->begin(), E = TheModule->end();
F != E; ++F) {
if (F->isMaterializable()) {
- if (error_code EC = Materialize(F))
+ if (std::error_code EC = Materialize(F))
return EC;
}
}
@@ -3270,16 +3299,16 @@ error_code BitcodeReader::MaterializeModule(Module *M) {
UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
UpgradeDebugInfo(*M);
- return error_code::success();
+ return std::error_code();
}
-error_code BitcodeReader::InitStream() {
+std::error_code BitcodeReader::InitStream() {
if (LazyStreamer)
return InitLazyStream();
return InitStreamFromBuffer();
}
-error_code BitcodeReader::InitStreamFromBuffer() {
+std::error_code BitcodeReader::InitStreamFromBuffer() {
const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
@@ -3299,10 +3328,10 @@ error_code BitcodeReader::InitStreamFromBuffer() {
StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
Stream.init(*StreamFile);
- return error_code::success();
+ return std::error_code();
}
-error_code BitcodeReader::InitLazyStream() {
+std::error_code BitcodeReader::InitLazyStream() {
// Check and strip off the bitcode wrapper; BitstreamReader expects never to
// see it.
StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
@@ -3323,12 +3352,12 @@ error_code BitcodeReader::InitLazyStream() {
Bytes->dropLeadingBytes(bitcodeStart - buf);
Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
}
- return error_code::success();
+ return std::error_code();
}
namespace {
-class BitcodeErrorCategoryType : public error_category {
- const char *name() const override {
+class BitcodeErrorCategoryType : public std::error_category {
+ const char *name() const LLVM_NOEXCEPT override {
return "llvm.bitcode";
}
std::string message(int IE) const override {
@@ -3378,7 +3407,7 @@ class BitcodeErrorCategoryType : public error_category {
};
}
-const error_category &BitcodeReader::BitcodeErrorCategory() {
+const std::error_category &BitcodeReader::BitcodeErrorCategory() {
static BitcodeErrorCategoryType O;
return O;
}
@@ -3394,12 +3423,11 @@ ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
Module *M = new Module(Buffer->getBufferIdentifier(), Context);
BitcodeReader *R = new BitcodeReader(Buffer, Context);
M->setMaterializer(R);
- if (error_code EC = R->ParseBitcodeInto(M)) {
+ if (std::error_code EC = R->ParseBitcodeInto(M)) {
+ R->releaseBuffer(); // Never take ownership on error.
delete M; // Also deletes R.
return EC;
}
- // Have the BitcodeReader dtor delete 'Buffer'.
- R->setBufferOwned(true);
R->materializeForwardReferencedFunctions();
@@ -3414,13 +3442,12 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
Module *M = new Module(name, Context);
BitcodeReader *R = new BitcodeReader(streamer, Context);
M->setMaterializer(R);
- if (error_code EC = R->ParseBitcodeInto(M)) {
+ if (std::error_code EC = R->ParseBitcodeInto(M)) {
if (ErrMsg)
*ErrMsg = EC.message();
delete M; // Also deletes R.
return nullptr;
}
- R->setBufferOwned(false); // no buffer to delete
return M;
}
@@ -3430,13 +3457,8 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
if (!ModuleOrErr)
return ModuleOrErr;
Module *M = ModuleOrErr.get();
-
- // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
- // there was an error.
- static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
-
// Read in the entire module, and destroy the BitcodeReader.
- if (error_code EC = M->materializeAllPermanently()) {
+ if (std::error_code EC = M->materializeAllPermanently(true)) {
delete M;
return EC;
}
@@ -3448,17 +3470,12 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer,
}
std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
- LLVMContext& Context,
- std::string *ErrMsg) {
+ LLVMContext &Context) {
BitcodeReader *R = new BitcodeReader(Buffer, Context);
- // Don't let the BitcodeReader dtor delete 'Buffer'.
- R->setBufferOwned(false);
-
- std::string Triple("");
- if (error_code EC = R->ParseTriple(Triple))
- if (ErrMsg)
- *ErrMsg = EC.message();
-
+ ErrorOr<std::string> Triple = R->parseTriple();
+ R->releaseBuffer();
delete R;
- return Triple;
+ if (Triple.getError())
+ return "";
+ return Triple.get();
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 593d8f9..1d4869a 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -22,10 +22,11 @@
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
#include <vector>
namespace llvm {
+ class Comdat;
class MemoryBuffer;
class LLVMContext;
@@ -125,8 +126,7 @@ public:
class BitcodeReader : public GVMaterializer {
LLVMContext &Context;
Module *TheModule;
- MemoryBuffer *Buffer;
- bool BufferOwned;
+ std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<BitstreamReader> StreamFile;
BitstreamCursor Stream;
DataStreamer *LazyStreamer;
@@ -136,6 +136,7 @@ class BitcodeReader : public GVMaterializer {
std::vector<Type*> TypeList;
BitcodeReaderValueList ValueList;
BitcodeReaderMDValueList MDValueList;
+ std::vector<Comdat *> ComdatList;
SmallVector<Instruction *, 64> InstructionList;
SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
@@ -193,7 +194,7 @@ class BitcodeReader : public GVMaterializer {
/// not need this flag.
bool UseRelativeIDs;
- static const error_category &BitcodeErrorCategory();
+ static const std::error_category &BitcodeErrorCategory();
public:
enum ErrorType {
@@ -219,47 +220,39 @@ public:
InvalidValue // Invalid version, inst number, attr number, etc
};
- error_code Error(ErrorType E) {
- return error_code(E, BitcodeErrorCategory());
+ std::error_code Error(ErrorType E) {
+ return std::error_code(E, BitcodeErrorCategory());
}
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
- : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
- LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
- ValueList(C), MDValueList(C),
- SeenFirstFunctionBody(false), UseRelativeIDs(false) {
- }
+ : Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr),
+ NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
+ MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
- : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
- LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
- ValueList(C), MDValueList(C),
- SeenFirstFunctionBody(false), UseRelativeIDs(false) {
- }
- ~BitcodeReader() {
- FreeState();
- }
+ : Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer),
+ NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C),
+ MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {}
+ ~BitcodeReader() { FreeState(); }
void materializeForwardReferencedFunctions();
void FreeState();
- /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
- /// when the reader is destroyed.
- void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+ void releaseBuffer() override;
bool isMaterializable(const GlobalValue *GV) const override;
bool isDematerializable(const GlobalValue *GV) const override;
- error_code Materialize(GlobalValue *GV) override;
- error_code MaterializeModule(Module *M) override;
+ std::error_code Materialize(GlobalValue *GV) override;
+ std::error_code MaterializeModule(Module *M) override;
void Dematerialize(GlobalValue *GV) override;
/// @brief Main interface to parsing a bitcode buffer.
/// @returns true if an error occurred.
- error_code ParseBitcodeInto(Module *M);
+ std::error_code ParseBitcodeInto(Module *M);
/// @brief Cheap mechanism to just extract module triple
/// @returns true if an error occurred.
- error_code ParseTriple(std::string &Triple);
+ ErrorOr<std::string> parseTriple();
static uint64_t decodeSignRotatedValue(uint64_t V);
@@ -346,28 +339,29 @@ private:
return getFnValueByID(ValNo, Ty);
}
- error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
- error_code ParseModule(bool Resume);
- error_code ParseAttributeBlock();
- error_code ParseAttributeGroupBlock();
- error_code ParseTypeTable();
- error_code ParseTypeTableBody();
-
- error_code ParseValueSymbolTable();
- error_code ParseConstants();
- error_code RememberAndSkipFunctionBody();
- error_code ParseFunctionBody(Function *F);
- error_code GlobalCleanup();
- error_code ResolveGlobalAndAliasInits();
- error_code ParseMetadata();
- error_code ParseMetadataAttachment();
- error_code ParseModuleTriple(std::string &Triple);
- error_code ParseUseLists();
- error_code InitStream();
- error_code InitStreamFromBuffer();
- error_code InitLazyStream();
- error_code FindFunctionInStream(Function *F,
- DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
+ std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+ std::error_code ParseModule(bool Resume);
+ std::error_code ParseAttributeBlock();
+ std::error_code ParseAttributeGroupBlock();
+ std::error_code ParseTypeTable();
+ std::error_code ParseTypeTableBody();
+
+ std::error_code ParseValueSymbolTable();
+ std::error_code ParseConstants();
+ std::error_code RememberAndSkipFunctionBody();
+ std::error_code ParseFunctionBody(Function *F);
+ std::error_code GlobalCleanup();
+ std::error_code ResolveGlobalAndAliasInits();
+ std::error_code ParseMetadata();
+ std::error_code ParseMetadataAttachment();
+ ErrorOr<std::string> parseModuleTriple();
+ std::error_code ParseUseLists();
+ std::error_code InitStream();
+ std::error_code InitStreamFromBuffer();
+ std::error_code InitLazyStream();
+ std::error_code FindFunctionInStream(
+ Function *F,
+ DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
};
} // End llvm namespace
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index f31e1fa..72451ec 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -97,7 +97,7 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op,
switch (Op.getEncoding()) {
case BitCodeAbbrevOp::Array:
case BitCodeAbbrevOp::Blob:
- assert(0 && "Should not reach here");
+ llvm_unreachable("Should not reach here");
case BitCodeAbbrevOp::Fixed:
Vals.push_back(Read((unsigned)Op.getEncodingData()));
break;
@@ -117,7 +117,7 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) {
switch (Op.getEncoding()) {
case BitCodeAbbrevOp::Array:
case BitCodeAbbrevOp::Blob:
- assert(0 && "Should not reach here");
+ llvm_unreachable("Should not reach here");
case BitCodeAbbrevOp::Fixed:
(void)Read((unsigned)Op.getEncodingData());
break;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index cc73b84..dd9282a 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -177,6 +177,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_INLINE_HINT;
case Attribute::InReg:
return bitc::ATTR_KIND_IN_REG;
+ case Attribute::JumpTable:
+ return bitc::ATTR_KIND_JUMP_TABLE;
case Attribute::MinSize:
return bitc::ATTR_KIND_MIN_SIZE;
case Attribute::Naked:
@@ -511,7 +513,7 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) {
llvm_unreachable("Invalid DLL storage class");
}
-static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) {
+static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) {
switch (GV.getThreadLocalMode()) {
case GlobalVariable::NotThreadLocal: return 0;
case GlobalVariable::GeneralDynamicTLSModel: return 1;
@@ -522,6 +524,35 @@ static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) {
llvm_unreachable("Invalid TLS model");
}
+static unsigned getEncodedComdatSelectionKind(const Comdat &C) {
+ switch (C.getSelectionKind()) {
+ case Comdat::Any:
+ return bitc::COMDAT_SELECTION_KIND_ANY;
+ case Comdat::ExactMatch:
+ return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH;
+ case Comdat::Largest:
+ return bitc::COMDAT_SELECTION_KIND_LARGEST;
+ case Comdat::NoDuplicates:
+ return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES;
+ case Comdat::SameSize:
+ return bitc::COMDAT_SELECTION_KIND_SAME_SIZE;
+ }
+ llvm_unreachable("Invalid selection kind");
+}
+
+static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+ SmallVector<uint8_t, 64> Vals;
+ for (const Comdat *C : VE.getComdats()) {
+ // COMDAT: [selection_kind, name]
+ Vals.push_back(getEncodedComdatSelectionKind(*C));
+ Vals.push_back(C->getName().size());
+ for (char Chr : C->getName())
+ Vals.push_back((unsigned char)Chr);
+ Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0);
+ Vals.clear();
+ }
+}
+
// Emit top-level description of module, including target triple, inline asm,
// descriptors for global variables, and function prototype info.
static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
@@ -623,12 +654,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
if (GV.isThreadLocal() ||
GV.getVisibility() != GlobalValue::DefaultVisibility ||
GV.hasUnnamedAddr() || GV.isExternallyInitialized() ||
- GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass) {
+ GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
+ GV.hasComdat()) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
Vals.push_back(GV.hasUnnamedAddr());
Vals.push_back(GV.isExternallyInitialized());
Vals.push_back(getEncodedDLLStorageClass(GV));
+ Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0);
} else {
AbbrevToUse = SimpleGVarAbbrev;
}
@@ -654,6 +687,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1)
: 0);
Vals.push_back(getEncodedDLLStorageClass(F));
+ Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0);
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
@@ -668,6 +702,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
Vals.push_back(getEncodedLinkage(A));
Vals.push_back(getEncodedVisibility(A));
Vals.push_back(getEncodedDLLStorageClass(A));
+ Vals.push_back(getEncodedThreadLocalMode(A));
+ Vals.push_back(A.hasUnnamedAddr());
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
Vals.clear();
@@ -1445,6 +1481,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
cast<AtomicCmpXchgInst>(I).getSynchScope()));
Vals.push_back(GetEncodedOrdering(
cast<AtomicCmpXchgInst>(I).getFailureOrdering()));
+ Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak());
break;
case Instruction::AtomicRMW:
Code = bitc::FUNC_CODE_INST_ATOMICRMW;
@@ -1910,6 +1947,8 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
// Emit information describing all of the types in the module.
WriteTypeTable(VE, Stream);
+ writeComdats(VE, Stream);
+
// Emit top-level description of module, including target triple, inline asm,
// descriptors for global variables, and function prototype info.
WriteModuleInfo(M, VE, Stream);
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 8531e76..15f8034 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -73,37 +73,34 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
// Enumerate types used by function bodies and argument lists.
- for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
-
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- EnumerateType(I->getType());
-
- for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
- for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
- OI != E; ++OI) {
- if (MDNode *MD = dyn_cast<MDNode>(*OI))
+ for (const Function &F : *M) {
+ for (const Argument &A : F.args())
+ EnumerateType(A.getType());
+
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ for (const Use &Op : I.operands()) {
+ if (MDNode *MD = dyn_cast<MDNode>(&Op))
if (MD->isFunctionLocal() && MD->getFunction())
// These will get enumerated during function-incorporation.
continue;
- EnumerateOperandType(*OI);
+ EnumerateOperandType(Op);
}
- EnumerateType(I->getType());
- if (const CallInst *CI = dyn_cast<CallInst>(I))
+ EnumerateType(I.getType());
+ if (const CallInst *CI = dyn_cast<CallInst>(&I))
EnumerateAttributes(CI->getAttributes());
- else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+ else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
EnumerateAttributes(II->getAttributes());
// Enumerate metadata attached with this instruction.
MDs.clear();
- I->getAllMetadataOtherThanDebugLoc(MDs);
+ I.getAllMetadataOtherThanDebugLoc(MDs);
for (unsigned i = 0, e = MDs.size(); i != e; ++i)
EnumerateMetadata(MDs[i].second);
- if (!I->getDebugLoc().isUnknown()) {
+ if (!I.getDebugLoc().isUnknown()) {
MDNode *Scope, *IA;
- I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+ I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext());
if (Scope) EnumerateMetadata(Scope);
if (IA) EnumerateMetadata(IA);
}
@@ -120,6 +117,12 @@ unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
return I->second;
}
+unsigned ValueEnumerator::getComdatID(const Comdat *C) const {
+ unsigned ComdatID = Comdats.idFor(C);
+ assert(ComdatID && "Comdat not found!");
+ return ComdatID;
+}
+
void ValueEnumerator::setInstructionID(const Instruction *I) {
InstructionMap[I] = InstructionCount++;
}
@@ -310,6 +313,10 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
return;
}
+ if (auto *GO = dyn_cast<GlobalObject>(V))
+ if (const Comdat *C = GO->getComdat())
+ Comdats.insert(C);
+
// Enumerate the type of this value.
EnumerateType(V->getType());
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index d1ca15f..1c9f38e 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/UniqueVector.h"
#include "llvm/IR/Attributes.h"
#include <vector>
@@ -25,6 +26,7 @@ class Type;
class Value;
class Instruction;
class BasicBlock;
+class Comdat;
class Function;
class Module;
class MDNode;
@@ -48,6 +50,10 @@ private:
typedef DenseMap<const Value*, unsigned> ValueMapType;
ValueMapType ValueMap;
ValueList Values;
+
+ typedef UniqueVector<const Comdat *> ComdatSetType;
+ ComdatSetType Comdats;
+
ValueList MDValues;
SmallVector<const MDNode *, 8> FunctionLocalMDs;
ValueMapType MDValueMap;
@@ -139,6 +145,9 @@ public:
return AttributeGroups;
}
+ const ComdatSetType &getComdats() const { return Comdats; }
+ unsigned getComdatID(const Comdat *C) const;
+
/// getGlobalBasicBlockID - This returns the function-specific ID for the
/// specified basic block. This is relatively expensive information, so it
/// should only be used by rare constructs such as address-of-label.
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 6fc83a2..1bdf312 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines several CodeGen-specific LLVM IR analysis utilties.
+// This file defines several CodeGen-specific LLVM IR analysis utilities.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes,
/// between it and the return.
///
/// This function only tests target-independent requirements.
-bool llvm::isInTailCallPosition(ImmutableCallSite CS,
- const TargetLowering &TLI) {
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) {
const Instruction *I = CS.getInstruction();
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -490,7 +490,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+ (!DAG.getTarget().Options.GuaranteedTailCallOpt ||
!isa<UnreachableInst>(Term)))
return false;
@@ -509,7 +509,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS,
return false;
}
- return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI);
+ return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret,
+ *DAG.getTarget().getTargetLowering());
}
bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 7feb42c..05e5c45 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -24,11 +24,13 @@ codegen_SRC_FILES := \
GCMetadata.cpp \
GCMetadataPrinter.cpp \
GCStrategy.cpp \
+ GlobalMerge.cpp \
IfConversion.cpp \
InlineSpiller.cpp \
InterferenceCache.cpp \
IntrinsicLowering.cpp \
JITCodeEmitter.cpp \
+ JumpInstrTables.cpp \
LatencyPriorityQueue.cpp \
LexicalScopes.cpp \
LiveDebugVariables.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 1cb0159..251f5ef 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -37,8 +37,7 @@
using namespace llvm;
ARMException::ARMException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitCFI(false) {}
+ : EHStreamer(A), shouldEmitCFI(false) {}
ARMException::~ARMException() {}
@@ -100,7 +99,7 @@ void ARMException::endFunction(const MachineFunction *) {
ATS.emitHandlerData();
// Emit actual exception table
- EmitExceptionTable();
+ emitExceptionTable();
}
}
@@ -108,7 +107,7 @@ void ARMException::endFunction(const MachineFunction *) {
ATS.emitFnEnd();
}
-void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
+void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index f56eb6e..083cc0d 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -1,33 +1,33 @@
LOCAL_PATH := $(call my-dir)
codegen_asmprinter_SRC_FILES := \
- AsmPrinter.cpp
+ AddressPool.cpp \
+ ARMException.cpp \
+ AsmPrinter.cpp \
+ AsmPrinterDwarf.cpp \
+ AsmPrinterInlineAsm.cpp \
+ DbgValueHistoryCalculator.cpp \
+ DIE.cpp \
+ DIEHash.cpp \
+ DwarfAccelTable.cpp \
+ DwarfCFIException.cpp \
+ DwarfDebug.cpp \
+ DwarfFile.cpp \
+ DwarfStringPool.cpp \
+ DwarfUnit.cpp \
+ EHStreamer.cpp \
+ ErlangGCPrinter.cpp \
+ OcamlGCPrinter.cpp \
+ Win64Exception.cpp \
+ WinCodeViewLineTables.cpp
+
+
# For the host
# =====================================================
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- AddressPool.cpp \
- AsmPrinter.cpp \
- AsmPrinterDwarf.cpp \
- AsmPrinterInlineAsm.cpp \
- ARMException.cpp \
- DbgValueHistoryCalculator.cpp \
- DIE.cpp \
- DIEHash.cpp \
- DwarfAccelTable.cpp \
- DwarfCFIException.cpp \
- DwarfDebug.cpp \
- DwarfException.cpp \
- DwarfFile.cpp \
- DwarfStringPool.cpp \
- DwarfUnit.cpp \
- ErlangGCPrinter.cpp \
- OcamlGCPrinter.cpp \
- Win64Exception.cpp \
- WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
LOCAL_MODULE:= libLLVMAsmPrinter
LOCAL_MODULE_TAGS := optional
@@ -41,27 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY)
ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
-LOCAL_SRC_FILES := \
- AddressPool.cpp \
- AsmPrinter.cpp \
- AsmPrinterDwarf.cpp \
- AsmPrinterInlineAsm.cpp \
- ARMException.cpp \
- DbgValueHistoryCalculator.cpp \
- DIE.cpp \
- DIEHash.cpp \
- DwarfAccelTable.cpp \
- DwarfCFIException.cpp \
- DwarfDebug.cpp \
- DwarfException.cpp \
- DwarfFile.cpp \
- DwarfStringPool.cpp \
- DwarfUnit.cpp \
- ErlangGCPrinter.cpp \
- OcamlGCPrinter.cpp \
- Win64Exception.cpp \
- WinCodeViewLineTables.cpp
-
+LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES)
LOCAL_MODULE:= libLLVMAsmPrinter
LOCAL_MODULE_TAGS := optional
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7de9c6d..f80fdea 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -46,7 +47,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -232,23 +232,23 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
- DwarfException *DE = nullptr;
+ EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
break;
case ExceptionHandling::SjLj:
case ExceptionHandling::DwarfCFI:
- DE = new DwarfCFIException(this);
+ ES = new DwarfCFIException(this);
break;
case ExceptionHandling::ARM:
- DE = new ARMException(this);
+ ES = new ARMException(this);
break;
- case ExceptionHandling::Win64:
- DE = new Win64Exception(this);
+ case ExceptionHandling::WinEH:
+ ES = new Win64Exception(this);
break;
}
- if (DE)
- Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName));
+ if (ES)
+ Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
return false;
}
@@ -709,13 +709,12 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() {
}
bool AsmPrinter::needsSEHMoves() {
- return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 &&
+ return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH &&
MF->getFunction()->needsUnwindTableEntry();
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
- ExceptionHandling::ExceptionsType ExceptionHandlingType =
- MAI->getExceptionHandlingType();
+ ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
if (ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
ExceptionHandlingType != ExceptionHandling::ARM)
return;
@@ -870,6 +869,8 @@ void AsmPrinter::EmitFunctionBody() {
OutStreamer.AddBlankLine();
}
+static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP);
+
bool AsmPrinter::doFinalization(Module &M) {
// Emit global variables.
for (const auto &G : M.globals())
@@ -887,6 +888,54 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Get information about jump-instruction tables to print.
+ JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>();
+
+ if (JITI && !JITI->getTables().empty()) {
+ unsigned Arch = Triple(getTargetTriple()).getArch();
+ bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb);
+ MCInst TrapInst;
+ TM.getInstrInfo()->getTrap(TrapInst);
+ for (const auto &KV : JITI->getTables()) {
+ uint64_t Count = 0;
+ for (const auto &FunPair : KV.second) {
+ // Emit the function labels to make this be a function entry point.
+ MCSymbol *FunSym =
+ OutContext.GetOrCreateSymbol(FunPair.second->getName());
+ OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global);
+ // FIXME: JumpTableInstrInfo should store information about the required
+ // alignment of table entries and the size of the padding instruction.
+ EmitAlignment(3);
+ if (IsThumb)
+ OutStreamer.EmitThumbFunc(FunSym);
+ if (MAI->hasDotTypeDotSizeDirective())
+ OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction);
+ OutStreamer.EmitLabel(FunSym);
+
+ // Emit the jump instruction to transfer control to the original
+ // function.
+ MCInst JumpToFun;
+ MCSymbol *TargetSymbol =
+ OutContext.GetOrCreateSymbol(FunPair.first->getName());
+ const MCSymbolRefExpr *TargetSymRef =
+ MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT,
+ OutContext);
+ TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef);
+ OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo());
+ ++Count;
+ }
+
+ // Emit enough padding instructions to fill up to the next power of two.
+ // This assumes that the trap instruction takes 8 bytes or fewer.
+ uint64_t Remaining = NextPowerOf2(Count) - Count;
+ for (uint64_t C = 0; C < Remaining; ++C) {
+ EmitAlignment(3);
+ OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo());
+ }
+
+ }
+ }
+
// Emit module flags.
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
M.getModuleFlagsMetadata(ModuleFlags);
@@ -932,10 +981,6 @@ bool AsmPrinter::doFinalization(Module &M) {
for (const auto &Alias : M.aliases()) {
MCSymbol *Name = getSymbol(&Alias);
- const GlobalValue *GV = Alias.getAliasee();
- assert(!GV->isDeclaration());
- MCSymbol *Target = getSymbol(GV);
-
if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
@@ -947,7 +992,7 @@ bool AsmPrinter::doFinalization(Module &M) {
// Emit the directives as assignments aka .set:
OutStreamer.EmitAssignment(Name,
- MCSymbolRefExpr::Create(Target, OutContext));
+ lowerConstant(Alias.getAliasee(), *this));
}
}
@@ -1248,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
// Ignore debug and non-emitted data. This handles llvm.compiler.used.
- if (GV->getSection() == "llvm.metadata" ||
+ if (StringRef(GV->getSection()) == "llvm.metadata" ||
GV->hasAvailableExternallyLinkage())
return true;
@@ -1350,14 +1395,17 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
- const MCSection *KeySec = nullptr;
- if (S.ComdatKey) {
- KeySym = getSymbol(S.ComdatKey);
- KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM);
+ if (GlobalValue *GV = S.ComdatKey) {
+ if (GV->hasAvailableExternallyLinkage())
+ // If the associated variable is available_externally, some other TU
+ // will provide its dynamic initializer.
+ continue;
+
+ KeySym = getSymbol(GV);
}
const MCSection *OutputSection =
- (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec)
- : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec));
+ (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym)
+ : Obj.getStaticDtorSection(S.Priority, KeySym));
OutStreamer.SwitchSection(OutputSection);
if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
EmitAlignment(Align);
@@ -1817,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
SmallString<8> StrVal;
CFP->getValueAPF().toString(StrVal);
- CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ if (CFP->getType())
+ CFP->getType()->print(AP.OutStreamer.GetCommentOS());
+ else
+ AP.OutStreamer.GetCommentOS() << "Printing <null> Type";
AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n';
}
@@ -1830,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
// PPC's long double has odd notions of endianness compared to how LLVM
// handles it: p[0] goes first for *big* endian on PPC.
- if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) {
+ if (AP.TM.getDataLayout()->isBigEndian() &&
+ !CFP->getType()->isPPC_FP128Ty()) {
int Chunk = API.getNumWords() - 1;
if (TrailingBytes)
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index b4ef185..f555f21 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -10,10 +10,10 @@ add_llvm_library(LLVMAsmPrinter
DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfDebug.cpp
- DwarfException.cpp
DwarfFile.cpp
DwarfStringPool.cpp
DwarfUnit.cpp
+ EHStreamer.cpp
ErlangGCPrinter.cpp
OcamlGCPrinter.cpp
Win64Exception.cpp
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 6103254..a66d08e 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -15,6 +15,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include <algorithm>
#include <map>
+#include <set>
#define DEBUG_TYPE "dwarfdebug"
@@ -110,45 +111,73 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
RegVars.erase(I);
}
-// \brief Terminate location ranges for all variables, described by registers
-// clobbered by @MI.
-static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
- const MachineInstr &MI,
- const TargetRegisterInfo *TRI,
- DbgValueHistoryMap &HistMap) {
+// \brief Collect all registers clobbered by @MI and insert them to @Regs.
+static void collectClobberedRegisters(const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ std::set<unsigned> &Regs) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef() || !MO.getReg())
continue;
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI) {
- unsigned RegNo = *AI;
- clobberRegisterUses(RegVars, RegNo, HistMap, MI);
- }
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+ Regs.insert(*AI);
}
}
-// \brief Terminate the location range for all register-described variables
-// by inserting @ClobberingInstr to their history.
-static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars,
- DbgValueHistoryMap &HistMap,
- const MachineInstr &ClobberingInstr) {
- for (const auto &I : RegVars)
- for (const auto &Var : I.second)
- HistMap.endInstrRange(Var, ClobberingInstr);
- RegVars.clear();
+// \brief Returns the first instruction in @MBB which corresponds to
+// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
+static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
+ auto LastMI = MBB.getLastNonDebugInstr();
+ if (LastMI == MBB.end() || !LastMI->isReturn())
+ return nullptr;
+ // Assume that epilogue starts with instruction having the same debug location
+ // as the return instruction.
+ DebugLoc LastLoc = LastMI->getDebugLoc();
+ auto Res = LastMI;
+ for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend();
+ ++I) {
+ if (I->getDebugLoc() != LastLoc)
+ return Res;
+ Res = std::prev(I.base());
+ }
+ // If all instructions have the same debug location, assume whole MBB is
+ // an epilogue.
+ return MBB.begin();
+}
+
+// \brief Collect registers that are modified in the function body (their
+// contents is changed only in the prologue and epilogue).
+static void collectChangingRegs(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ std::set<unsigned> &Regs) {
+ for (const auto &MBB : *MF) {
+ auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
+ bool IsInEpilogue = false;
+ for (const auto &MI : MBB) {
+ IsInEpilogue |= &MI == FirstEpilogueInst;
+ if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue)
+ collectClobberedRegisters(MI, TRI, Regs);
+ }
+ }
}
void calculateDbgValueHistory(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
DbgValueHistoryMap &Result) {
- RegDescribedVarsMap RegVars;
+ std::set<unsigned> ChangingRegs;
+ collectChangingRegs(MF, TRI, ChangingRegs);
+ RegDescribedVarsMap RegVars;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
if (!MI.isDebugValue()) {
// Not a DBG_VALUE instruction. It may clobber registers which describe
// some variables.
- clobberRegisterUses(RegVars, MI, TRI, Result);
+ std::set<unsigned> MIClobberedRegs;
+ collectClobberedRegisters(MI, TRI, MIClobberedRegs);
+ for (unsigned RegNo : MIClobberedRegs) {
+ if (ChangingRegs.count(RegNo))
+ clobberRegisterUses(RegVars, RegNo, Result, MI);
+ }
continue;
}
@@ -167,8 +196,10 @@ void calculateDbgValueHistory(const MachineFunction *MF,
// Make sure locations for register-described variables are valid only
// until the end of the basic block (unless it's the last basic block, in
// which case let their liveness run off to the end of the function).
- if (!MBB.empty() && &MBB != &MF->back())
- clobberAllRegistersUses(RegVars, Result, MBB.back());
+ if (!MBB.empty() && &MBB != &MF->back()) {
+ for (unsigned RegNo : ChangingRegs)
+ clobberRegisterUses(RegVars, RegNo, Result, MBB.back());
+ }
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 30312ac..74215aa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -40,9 +40,8 @@
using namespace llvm;
DwarfCFIException::DwarfCFIException(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false),
- moveTypeModule(AsmPrinter::CFI_M_None) {}
+ : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
DwarfCFIException::~DwarfCFIException() {}
@@ -59,26 +58,16 @@ void DwarfCFIException::endModule() {
unsigned PerEncoding = TLOF.getPersonalityEncoding();
- if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel)
+ if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect)
return;
// Emit references to all used personality functions
- bool AtLeastOne = false;
const std::vector<const Function*> &Personalities = MMI->getPersonalities();
for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
if (!Personalities[i])
continue;
MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym);
- AtLeastOne = true;
- }
-
- if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) {
- // This is a temporary hack to keep sections in the same order they
- // were before. This lets us produce bit identical outputs while
- // transitioning to CFI.
- Asm->OutStreamer.SwitchSection(
- const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection());
}
}
@@ -123,9 +112,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding);
- Asm->OutStreamer.EmitDebugLabel
- (Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
+ MCSymbol *EHBegin =
+ Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+ if (Asm->MAI->useAssignmentForEHBegin()) {
+ MCContext &Ctx = Asm->OutContext;
+ MCSymbol *CurPos = Ctx.CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(CurPos);
+ Asm->OutStreamer.EmitAssignment(EHBegin,
+ MCSymbolRefExpr::Create(CurPos, Ctx));
+ } else {
+ Asm->OutStreamer.EmitLabel(EHBegin);
+ }
// Provide LSDA information.
if (!shouldEmitLSDA)
@@ -153,5 +150,5 @@ void DwarfCFIException::endFunction(const MachineFunction *) {
// Map all labels and get rid of any dead landing pads.
MMI->TidyLandingPads();
- EmitExceptionTable();
+ emitExceptionTable();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 2a0615d..77860c0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
clEnumVal(Disable, "Disabled"), clEnumValEnd),
cl::init(Default));
-static cl::opt<unsigned>
-DwarfVersionNumber("dwarf-version", cl::Hidden,
- cl::desc("Generate DWARF for dwarf version."), cl::init(0));
-
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "DWARF Debug Writer";
@@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
HasDwarfPubSections = DwarfPubSections == Enable;
+ unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
: MMI->getModule()->getDwarfVersion();
+ Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion);
+
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
beginModule();
@@ -531,8 +530,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
// shouldn't be found by lookup.
AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
DIDescriptor());
- SPCU.applySubprogramAttributes(SP, *AbsDef);
- SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext()));
+ SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef);
SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
createAndAddScopeChildren(SPCU, Scope, *AbsDef);
@@ -732,6 +730,8 @@ void DwarfDebug::beginModule() {
const Module *M = MMI->getModule();
+ FunctionDIs = makeSubprogramMap(*M);
+
// If module has named metadata anchors then use them, otherwise scan the
// module using debug info finder to collect debug info.
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
@@ -784,6 +784,26 @@ void DwarfDebug::beginModule() {
SectionMap[Asm->getObjFileLowering().getTextSection()];
}
+void DwarfDebug::finishVariableDefinitions() {
+ for (const auto &Var : ConcreteVariables) {
+ DIE *VariableDie = Var->getDIE();
+ // FIXME: There shouldn't be any variables without DIEs.
+ if (!VariableDie)
+ continue;
+ // FIXME: Consider the time-space tradeoff of just storing the unit pointer
+ // in the ConcreteVariables list, rather than looking it up again here.
+ // DIE::getUnit isn't simple - it walks parent pointers, etc.
+ DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+ assert(Unit);
+ DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable());
+ if (AbsVar && AbsVar->getDIE()) {
+ Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin,
+ *AbsVar->getDIE());
+ } else
+ Unit->applyVariableAttributes(*Var, *VariableDie);
+ }
+}
+
void DwarfDebug::finishSubprogramDefinitions() {
const Module *M = MMI->getModule();
@@ -811,8 +831,7 @@ void DwarfDebug::finishSubprogramDefinitions() {
// inlined versions during codegen.
D = SPCU->getOrCreateSubprogramDIE(SP);
// And attach the attributes
- SPCU->applySubprogramAttributes(SP, *D);
- SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext()));
+ SPCU->applySubprogramAttributesToDefinition(SP, *D);
}
}
}
@@ -850,8 +869,10 @@ void DwarfDebug::collectDeadVariables() {
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
DIVariable DV(Variables.getElement(vi));
assert(DV.isVariable());
- DbgVariable NewVar(DV, nullptr, this);
- SPDIE->addChild(SPCU->constructVariableDIE(NewVar));
+ DbgVariable NewVar(DV, this);
+ auto VariableDie = SPCU->constructVariableDIE(NewVar);
+ SPCU->applyVariableAttributes(NewVar, *VariableDie);
+ SPDIE->addChild(std::move(VariableDie));
}
}
}
@@ -861,6 +882,8 @@ void DwarfDebug::collectDeadVariables() {
void DwarfDebug::finalizeModuleInfo() {
finishSubprogramDefinitions();
+ finishVariableDefinitions();
+
// Collect info for variables that were optimized out.
collectDeadVariables();
@@ -1017,9 +1040,9 @@ void DwarfDebug::endModule() {
emitDebugInfoDWO();
emitDebugAbbrevDWO();
emitDebugLineDWO();
+ emitDebugLocDWO();
// Emit DWO addresses.
AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
- emitDebugLocDWO();
} else
// Emit info into a debug loc section.
emitDebugLoc();
@@ -1047,27 +1070,51 @@ void DwarfDebug::endModule() {
}
// Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
- DebugLoc ScopeLoc) {
- return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext()));
-}
-
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
- const MDNode *ScopeNode) {
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV,
+ DIVariable &Cleansed) {
LLVMContext &Ctx = DV->getContext();
// More then one inlined variable corresponds to one abstract variable.
- DIVariable Var = cleanseInlinedVariable(DV, Ctx);
- auto I = AbstractVariables.find(Var);
+ // FIXME: This duplication of variables when inlining should probably be
+ // removed. It's done to allow each DIVariable to describe its location
+ // because the DebugLoc on the dbg.value/declare isn't accurate. We should
+ // make it accurate then remove this duplication/cleansing stuff.
+ Cleansed = cleanseInlinedVariable(DV, Ctx);
+ auto I = AbstractVariables.find(Cleansed);
if (I != AbstractVariables.end())
return I->second.get();
+ return nullptr;
+}
- LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode);
- if (!Scope)
- return nullptr;
+DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) {
+ DIVariable Cleansed;
+ return getExistingAbstractVariable(DV, Cleansed);
+}
- auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this);
+void DwarfDebug::createAbstractVariable(const DIVariable &Var,
+ LexicalScope *Scope) {
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, this);
addScopeVariable(Scope, AbsDbgVariable.get());
- return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get();
+ AbstractVariables[Var] = std::move(AbsDbgVariable);
+}
+
+void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV,
+ const MDNode *ScopeNode) {
+ DIVariable Cleansed = DV;
+ if (getExistingAbstractVariable(DV, Cleansed))
+ return;
+
+ createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode));
+}
+
+void
+DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV,
+ const MDNode *ScopeNode) {
+ DIVariable Cleansed = DV;
+ if (getExistingAbstractVariable(DV, Cleansed))
+ return;
+
+ if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode))
+ createAbstractVariable(Cleansed, Scope);
}
// If Var is a current function argument then add it to CurrentFnArguments list.
@@ -1106,11 +1153,11 @@ void DwarfDebug::collectVariableInfoFromMMITable(
if (!Scope)
continue;
- DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc);
- DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this);
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ DbgVariable *RegVar = ConcreteVariables.back().get();
RegVar->setFrameIndex(VI.Slot);
- if (!addCurrentFnArgument(RegVar, Scope))
- addScopeVariable(Scope, RegVar);
+ addScopeVariable(Scope, RegVar);
}
}
@@ -1175,18 +1222,14 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
Processed.insert(DV);
const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
- DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
- DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
- if (!addCurrentFnArgument(RegVar, Scope))
- addScopeVariable(Scope, RegVar);
- if (AbsVar)
- AbsVar->setMInsn(MInsn);
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this));
+ DbgVariable *RegVar = ConcreteVariables.back().get();
+ addScopeVariable(Scope, RegVar);
// Check if the first DBG_VALUE is valid for the rest of the function.
- if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
- RegVar->setMInsn(MInsn);
+ if (Ranges.size() == 1 && Ranges.front().second == nullptr)
continue;
- }
// Handle multiple DBG_VALUE instructions describing one variable.
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
@@ -1205,6 +1248,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
!Begin->getOperand(0).getReg())
continue;
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin);
+ if (End != nullptr)
+ DEBUG(dbgs() << "\t" << *End);
+ else
+ DEBUG(dbgs() << "\tNULL\n");
const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
@@ -1218,8 +1266,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
EndLabel = getLabelBeforeInsn(std::next(I)->first);
assert(EndLabel && "Forgot label after instruction ending a range!");
- DEBUG(dbgs() << "DotDebugLoc Pair:\n"
- << "\t" << *Begin << "\t" << *End << "\n");
DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
DebugLoc.push_back(std::move(Loc));
@@ -1233,11 +1279,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
assert(DV.isVariable());
if (!Processed.insert(DV))
continue;
- if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
- addScopeVariable(
- Scope,
- new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()),
- this));
+ if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) {
+ ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode());
+ ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this));
+ addScopeVariable(Scope, ConcreteVariables.back().get());
+ }
}
}
@@ -1371,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo())
return;
+ auto DI = FunctionDIs.find(MF->getFunction());
+ if (DI == FunctionDIs.end())
+ return;
+
// Grab the lexical scopes for the function, if we don't have any of those
// then we're not going to be able to do anything.
LScopes.initialize(*MF);
@@ -1386,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ // FnScope->getScopeNode() and DI->second should represent the same function,
+ // though they may not be the same MDNode due to inline functions merged in
+ // LTO where the debug info metadata still differs (either due to distinct
+ // written differences - two versions of a linkonce_odr function
+ // written/copied into two separate files, or some sub-optimal metadata that
+ // isn't structurally identical (see: file path/name info from clang, which
+ // includes the directory of the cpp file being built, even when the file name
+ // is absolute (such as an <> lookup header)))
DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
assert(TheCU && "Unable to find compile unit!");
if (Asm->OutStreamer.hasRawTextSupport())
@@ -1440,6 +1498,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
+ if (addCurrentFnArgument(Var, LS))
+ return;
SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS];
DIVariable DV = Var->getVariable();
// Variables with positive arg numbers are parameters.
@@ -1481,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
assert(CurFn == MF);
assert(CurFn != nullptr);
- if (!MMI->hasDebugInfo() || LScopes.empty()) {
+ if (!MMI->hasDebugInfo() || LScopes.empty() ||
+ !FunctionDIs.count(MF->getFunction())) {
// If we don't have a lexical scope for this function then there will
// be a hole in the range information. Keep note of this by setting the
// previously used section to nullptr.
@@ -1517,7 +1578,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
assert(DV && DV.isVariable());
if (!ProcessedVars.insert(DV))
continue;
- findAbstractVariable(DV, DV.getContext());
+ ensureAbstractVariableIsCreated(DV, DV.getContext());
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
@@ -1536,12 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
// DbgVariables except those that are also in AbstractVariables (since they
// can be used cross-function)
- for (const auto &I : ScopeVariables)
- for (const auto *Var : I.second)
- if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable())
- delete Var;
ScopeVariables.clear();
- DeleteContainerPointers(CurrentFnArguments);
+ CurrentFnArguments.clear();
DbgValues.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 2f5abc8..ffe4843 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -27,6 +27,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/MC/MachineLocation.h"
@@ -71,16 +72,21 @@ class DbgVariable {
DIVariable Var; // Variable Descriptor.
DIE *TheDIE; // Variable DIE.
unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
- DbgVariable *AbsVar; // Corresponding Abstract variable, if any.
const MachineInstr *MInsn; // DBG_VALUE instruction of the variable.
int FrameIndex;
DwarfDebug *DD;
public:
- // AbsVar may be NULL.
- DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
- : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV),
- MInsn(nullptr), FrameIndex(~0), DD(DD) {}
+ /// Construct a DbgVariable from a DIVariable.
+ DbgVariable(DIVariable V, DwarfDebug *DD)
+ : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr),
+ FrameIndex(~0), DD(DD) {}
+
+ /// Construct a DbgVariable from a DEBUG_VALUE.
+ /// AbstractVar may be NULL.
+ DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD)
+ : Var(DbgValue->getDebugVariable()), TheDIE(nullptr),
+ DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
@@ -89,9 +95,7 @@ public:
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
StringRef getName() const { return Var.getName(); }
- DbgVariable *getAbstractVariable() const { return AbsVar; }
const MachineInstr *getMInsn() const { return MInsn; }
- void setMInsn(const MachineInstr *M) { MInsn = M; }
int getFrameIndex() const { return FrameIndex; }
void setFrameIndex(int FI) { FrameIndex = FI; }
// Translate tag to proper Dwarf tag.
@@ -200,6 +204,7 @@ class DwarfDebug : public AsmPrinterHandler {
// Collection of abstract variables.
DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
+ SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables;
// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
// can refer to them in spite of insertions into this list.
@@ -325,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler {
DwarfAccelTable AccelNamespace;
DwarfAccelTable AccelTypes;
+ DenseMap<const Function *, DISubprogram> FunctionDIs;
+
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
@@ -334,8 +341,14 @@ class DwarfDebug : public AsmPrinterHandler {
}
/// \brief Find abstract variable associated with Var.
- DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
- DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope);
+ DbgVariable *getExistingAbstractVariable(const DIVariable &DV,
+ DIVariable &Cleansed);
+ DbgVariable *getExistingAbstractVariable(const DIVariable &DV);
+ void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope);
+ void ensureAbstractVariableIsCreated(const DIVariable &Var,
+ const MDNode *Scope);
+ void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var,
+ const MDNode *Scope);
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
@@ -389,6 +402,8 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Collect info for variables that were optimized out.
void collectDeadVariables();
+ void finishVariableDefinitions();
+
void finishSubprogramDefinitions();
/// \brief Finish off debug information after all functions have been
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f792482..0440fce 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -14,138 +14,14 @@
#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
-#include "AsmPrinterHandler.h"
-#include "llvm/ADT/DenseMap.h"
+#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include <vector>
namespace llvm {
-
-template <typename T> class SmallVectorImpl;
-struct LandingPadInfo;
-class MachineModuleInfo;
-class MachineInstr;
class MachineFunction;
-class MCAsmInfo;
-class MCExpr;
-class MCSymbol;
-class Function;
class ARMTargetStreamer;
-class AsmPrinter;
-
-//===----------------------------------------------------------------------===//
-/// DwarfException - Emits Dwarf exception handling directives.
-///
-class DwarfException : public AsmPrinterHandler {
-protected:
- /// Asm - Target of Dwarf emission.
- AsmPrinter *Asm;
-
- /// MMI - Collected machine module information.
- MachineModuleInfo *MMI;
-
- /// SharedTypeIds - How many leading type ids two landing pads have in common.
- static unsigned SharedTypeIds(const LandingPadInfo *L,
- const LandingPadInfo *R);
-
- /// PadRange - Structure holding a try-range and the associated landing pad.
- struct PadRange {
- // The index of the landing pad.
- unsigned PadIndex;
- // The index of the begin and end labels in the landing pad's label lists.
- unsigned RangeIndex;
- };
-
- typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
-
- /// ActionEntry - Structure describing an entry in the actions table.
- struct ActionEntry {
- int ValueForTypeID; // The value to write - may not be equal to the type id.
- int NextAction;
- unsigned Previous;
- };
-
- /// CallSiteEntry - Structure describing an entry in the call-site table.
- struct CallSiteEntry {
- // The 'try-range' is BeginLabel .. EndLabel.
- MCSymbol *BeginLabel; // zero indicates the start of the function.
- MCSymbol *EndLabel; // zero indicates the end of the function.
-
- // The landing pad starts at PadLabel.
- MCSymbol *PadLabel; // zero indicates that there is no landing pad.
- unsigned Action;
- };
-
- /// ComputeActionsTable - Compute the actions table and gather the first
- /// action index for each landing pad site.
- unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
- SmallVectorImpl<ActionEntry> &Actions,
- SmallVectorImpl<unsigned> &FirstActions);
-
- /// CallToNoUnwindFunction - Return `true' if this is a call to a function
- /// marked `nounwind'. Return `false' otherwise.
- bool CallToNoUnwindFunction(const MachineInstr *MI);
-
- /// ComputeCallSiteTable - Compute the call-site table. The entry for an
- /// invoke has a try-range containing the call, a non-zero landing pad and an
- /// appropriate action. The entry for an ordinary call has a try-range
- /// containing the call and zero for the landing pad and the action. Calls
- /// marked 'nounwind' have no entry and must not be contained in the try-range
- /// of any entry - they form gaps in the table. Entries must be ordered by
- /// try-range address.
- void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
- const RangeMapType &PadMap,
- const SmallVectorImpl<const LandingPadInfo *> &LPs,
- const SmallVectorImpl<unsigned> &FirstActions);
-
- /// EmitExceptionTable - Emit landing pads and actions.
- ///
- /// The general organization of the table is complex, but the basic concepts
- /// are easy. First there is a header which describes the location and
- /// organization of the three components that follow.
- /// 1. The landing pad site information describes the range of code covered
- /// by the try. In our case it's an accumulation of the ranges covered
- /// by the invokes in the try. There is also a reference to the landing
- /// pad that handles the exception once processed. Finally an index into
- /// the actions table.
- /// 2. The action table, in our case, is composed of pairs of type ids
- /// and next action offset. Starting with the action index from the
- /// landing pad site, each type Id is checked for a match to the current
- /// exception. If it matches then the exception and type id are passed
- /// on to the landing pad. Otherwise the next action is looked up. This
- /// chain is terminated with a next action of zero. If no type id is
- /// found the frame is unwound and handling continues.
- /// 3. Type id table contains references to all the C++ typeinfo for all
- /// catches in the function. This tables is reversed indexed base 1.
- void EmitExceptionTable();
-
- virtual void EmitTypeInfos(unsigned TTypeEncoding);
-
-public:
- //===--------------------------------------------------------------------===//
- // Main entry points.
- //
- DwarfException(AsmPrinter *A);
- virtual ~DwarfException();
-
- /// endModule - Emit all exception information that should come after the
- /// content.
- void endModule() override;
-
- /// beginFunction - Gather pre-function exception information. Assumes being
- /// emitted immediately after the function entry point.
- void beginFunction(const MachineFunction *MF) override;
-
- /// endFunction - Gather and emit post-function exception information.
- void endFunction(const MachineFunction *) override;
-
- // We don't need these.
- void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
- void beginInstruction(const MachineInstr *MI) override {}
- void endInstruction() override {}
-};
-class DwarfCFIException : public DwarfException {
+class DwarfCFIException : public EHStreamer {
/// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality
/// should be emitted.
bool shouldEmitPersonality;
@@ -179,8 +55,8 @@ public:
void endFunction(const MachineFunction *) override;
};
-class ARMException : public DwarfException {
- void EmitTypeInfos(unsigned TTypeEncoding) override;
+class ARMException : public EHStreamer {
+ void emitTypeInfos(unsigned TTypeEncoding) override;
ARMTargetStreamer &getTargetStreamer();
/// shouldEmitCFI - Per-function flag to indicate if frame CFI info
@@ -206,7 +82,7 @@ public:
void endFunction(const MachineFunction *) override;
};
-class Win64Exception : public DwarfException {
+class Win64Exception : public EHStreamer {
/// shouldEmitPersonality - Per-function flag to indicate if personality
/// info should be emitted.
bool shouldEmitPersonality;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index a70c0f7..9538bee 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1071,6 +1071,8 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const {
I != E; ++I) {
DIScope Ctx = *I;
StringRef Name = Ctx.getName();
+ if (Name.empty() && Ctx.isNameSpace())
+ Name = "(anonymous namespace)";
if (!Name.empty()) {
CS += Name;
CS += "::";
@@ -1359,12 +1361,13 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
- if (!NS.getName().empty()) {
+ StringRef Name = NS.getName();
+ if (!Name.empty())
addString(NDie, dwarf::DW_AT_name, NS.getName());
- DD->addAccelNamespace(NS.getName(), NDie);
- addGlobalName(NS.getName(), NDie, NS.getContext());
- } else
- DD->addAccelNamespace("(anonymous namespace)", NDie);
+ else
+ Name = "(anonymous namespace)";
+ DD->addAccelNamespace(Name, NDie);
+ addGlobalName(Name, NDie, NS.getContext());
addSourceLine(NDie, NS);
return &NDie;
}
@@ -1382,14 +1385,14 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
// Add subprogram definitions to the CU die directly.
ContextDIE = &getUnitDie();
- // Build the decl now to ensure it preceeds the definition.
+ // Build the decl now to ensure it precedes the definition.
getOrCreateSubprogramDIE(SPDecl);
}
// DW_TAG_inlined_subroutine may refer to this DIE.
DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
- // Abort here and fill this in later, depending on whether or not this
+ // Stop here and fill this in later, depending on whether or not this
// subprogram turns out to have inlined instances or not.
if (SP.isDefinition())
return &SPDie;
@@ -1398,12 +1401,21 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
return &SPDie;
}
+void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) {
+ DISubprogram SPDecl = SP.getFunctionDeclaration();
+ DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext());
+ applySubprogramAttributes(SP, SPDie);
+ addGlobalName(SP.getName(), SPDie, Context);
+}
+
void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
DIE *DeclDie = nullptr;
StringRef DeclLinkageName;
if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
DeclDie = getDIE(SPDecl);
- assert(DeclDie);
+ assert(DeclDie && "This DIE should've already been constructed when the "
+ "definition DIE was created in "
+ "getOrCreateSubprogramDIE");
DeclLinkageName = SPDecl.getLinkageName();
}
@@ -1502,6 +1514,17 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
addFlag(SPDie, dwarf::DW_AT_explicit);
}
+void DwarfUnit::applyVariableAttributes(const DbgVariable &Var,
+ DIE &VariableDie) {
+ StringRef Name = Var.getName();
+ if (!Name.empty())
+ addString(VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(VariableDie, Var.getVariable());
+ addType(VariableDie, Var.getType());
+ if (Var.isArtificial())
+ addFlag(VariableDie, dwarf::DW_AT_artificial);
+}
+
// Return const expression if value is a GEP to access merged global
// constant. e.g.
// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
@@ -1665,10 +1688,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
DD->addAccelName(GV.getLinkageName(), AddrDIE);
}
- if (!GV.isLocalToUnit())
- addGlobalName(GV.getName(),
- VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
- GV.getContext());
+ addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
+ GV.getContext());
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
@@ -1777,24 +1798,13 @@ std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
bool Abstract) {
- StringRef Name = DV.getName();
-
// Define variable debug information entry.
auto VariableDie = make_unique<DIE>(DV.getTag());
- DbgVariable *AbsVar = DV.getAbstractVariable();
- if (AbsVar && AbsVar->getDIE())
- addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE());
- else {
- if (!Name.empty())
- addString(*VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(*VariableDie, DV.getVariable());
- addType(*VariableDie, DV.getType());
- if (DV.isArtificial())
- addFlag(*VariableDie, dwarf::DW_AT_artificial);
- }
- if (Abstract)
+ if (Abstract) {
+ applyVariableAttributes(DV, *VariableDie);
return VariableDie;
+ }
// Add variable address.
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index acb7528..b7b83b2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -400,6 +400,8 @@ public:
DIE *getOrCreateSubprogramDIE(DISubprogram SP);
void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
+ void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie);
+ void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie);
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 3a12c73..73f62bf 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,45 +7,31 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains support for writing DWARF exception info into asm files.
+// This file contains support for writing exception info into assembly files.
//
//===----------------------------------------------------------------------===//
-#include "DwarfException.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
+#include "EHStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+
using namespace llvm;
-DwarfException::DwarfException(AsmPrinter *A)
- : Asm(A), MMI(Asm->MMI) {}
+EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {}
-DwarfException::~DwarfException() {}
+EHStreamer::~EHStreamer() {}
-/// SharedTypeIds - How many leading type ids two landing pads have in common.
-unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
- const LandingPadInfo *R) {
+/// How many leading type ids two landing pads have in common.
+unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R) {
const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
unsigned LSize = LIds.size(), RSize = RIds.size();
unsigned MinSize = LSize < RSize ? LSize : RSize;
@@ -58,10 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
return Count;
}
-/// ComputeActionsTable - Compute the actions table and gather the first action
-/// index for each landing pad site.
-unsigned DwarfException::
-ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+/// Compute the actions table and gather the first action index for each landing
+/// pad site.
+unsigned EHStreamer::
+computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions) {
@@ -109,7 +95,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
const LandingPadInfo *LPI = *I;
const std::vector<int> &TypeIds = LPI->TypeIds;
- unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+ unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0;
unsigned SizeSiteActions = 0;
if (NumShared < TypeIds.size()) {
@@ -167,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
return SizeActions;
}
-/// CallToNoUnwindFunction - Return `true' if this is a call to a function
-/// marked `nounwind'. Return `false' otherwise.
-bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+/// Return `true' if this is a call to a function marked `nounwind'. Return
+/// `false' otherwise.
+bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
assert(MI->isCall() && "This should be a call instruction!");
bool MarkedNoUnwind = false;
@@ -201,15 +187,14 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
return MarkedNoUnwind;
}
-/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
-/// has a try-range containing the call, a non-zero landing pad, and an
-/// appropriate action. The entry for an ordinary call has a try-range
-/// containing the call and zero for the landing pad and the action. Calls
-/// marked 'nounwind' have no entry and must not be contained in the try-range
-/// of any entry - they form gaps in the table. Entries must be ordered by
-/// try-range address.
-void DwarfException::
-ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+/// Compute the call-site table. The entry for an invoke has a try-range
+/// containing the call, a non-zero landing pad, and an appropriate action. The
+/// entry for an ordinary call has a try-range containing the call and zero for
+/// the landing pad and the action. Calls marked 'nounwind' have no entry and
+/// must not be contained in the try-range of any entry - they form gaps in the
+/// table. Entries must be ordered by try-range address.
+void EHStreamer::
+computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
const RangeMapType &PadMap,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
@@ -228,7 +213,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (const auto &MI : MBB) {
if (!MI.isEHLabel()) {
if (MI.isCall())
- SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI);
+ SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
continue;
}
@@ -308,7 +293,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
}
-/// EmitExceptionTable - Emit landing pads and actions.
+/// Emit landing pads and actions.
///
/// The general organization of the table is complex, but the basic concepts are
/// easy. First there is a header which describes the location and organization
@@ -328,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
/// unwound and handling continues.
/// 3. Type ID table contains references to all the C++ typeinfo for all
/// catches in the function. This tables is reverse indexed base 1.
-void DwarfException::EmitExceptionTable() {
+void EHStreamer::emitExceptionTable() {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
@@ -350,7 +335,8 @@ void DwarfException::EmitExceptionTable() {
// landing pad site.
SmallVector<ActionEntry, 32> Actions;
SmallVector<unsigned, 64> FirstActions;
- unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+ unsigned SizeActions =
+ computeActionsTable(LandingPads, Actions, FirstActions);
// Invokes and nounwind calls have entries in PadMap (due to being bracketed
// by try-range labels when lowered). Ordinary calls do not, so appropriate
@@ -368,7 +354,7 @@ void DwarfException::EmitExceptionTable() {
// Compute the call-site table.
SmallVector<CallSiteEntry, 64> CallSites;
- ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+ computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
// Final tallies.
@@ -657,12 +643,12 @@ void DwarfException::EmitExceptionTable() {
Asm->EmitSLEB128(Action.NextAction);
}
- EmitTypeInfos(TTypeEncoding);
+ emitTypeInfos(TTypeEncoding);
Asm->EmitAlignment(2);
}
-void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
+void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
@@ -703,19 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) {
}
}
-/// endModule - Emit all exception information that should come after the
-/// content.
-void DwarfException::endModule() {
+/// Emit all exception information that should come after the content.
+void EHStreamer::endModule() {
llvm_unreachable("Should be implemented");
}
-/// beginFunction - Gather pre-function exception information. Assumes it's
-/// being emitted immediately after the function entry point.
-void DwarfException::beginFunction(const MachineFunction *MF) {
+/// Gather pre-function exception information. Assumes it's being emitted
+/// immediately after the function entry point.
+void EHStreamer::beginFunction(const MachineFunction *MF) {
llvm_unreachable("Should be implemented");
}
-/// endFunction - Gather and emit post-function exception information.
-void DwarfException::endFunction(const MachineFunction *) {
+/// Gather and emit post-function exception information.
+void EHStreamer::endFunction(const MachineFunction *) {
llvm_unreachable("Should be implemented");
}
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
new file mode 100644
index 0000000..2b6ba78
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -0,0 +1,138 @@
+//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing exception info into assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H
+
+#include "AsmPrinterHandler.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineInstr;
+class MachineFunction;
+class AsmPrinter;
+
+template <typename T>
+class SmallVectorImpl;
+
+/// Emits exception handling directives.
+class EHStreamer : public AsmPrinterHandler {
+protected:
+ /// Target of directive emission.
+ AsmPrinter *Asm;
+
+ /// Collected machine module information.
+ MachineModuleInfo *MMI;
+
+ /// How many leading type ids two landing pads have in common.
+ static unsigned sharedTypeIDs(const LandingPadInfo *L,
+ const LandingPadInfo *R);
+
+ /// Structure holding a try-range and the associated landing pad.
+ struct PadRange {
+ // The index of the landing pad.
+ unsigned PadIndex;
+ // The index of the begin and end labels in the landing pad's label lists.
+ unsigned RangeIndex;
+ };
+
+ typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+ /// Structure describing an entry in the actions table.
+ struct ActionEntry {
+ int ValueForTypeID; // The value to write - may not be equal to the type id.
+ int NextAction;
+ unsigned Previous;
+ };
+
+ /// Structure describing an entry in the call-site table.
+ struct CallSiteEntry {
+ // The 'try-range' is BeginLabel .. EndLabel.
+ MCSymbol *BeginLabel; // zero indicates the start of the function.
+ MCSymbol *EndLabel; // zero indicates the end of the function.
+
+ // The landing pad starts at PadLabel.
+ MCSymbol *PadLabel; // zero indicates that there is no landing pad.
+ unsigned Action;
+ };
+
+ /// Compute the actions table and gather the first action index for each
+ /// landing pad site.
+ unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+ SmallVectorImpl<ActionEntry> &Actions,
+ SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Return `true' if this is a call to a function marked `nounwind'. Return
+ /// `false' otherwise.
+ bool callToNoUnwindFunction(const MachineInstr *MI);
+
+ /// Compute the call-site table. The entry for an invoke has a try-range
+ /// containing the call, a non-zero landing pad and an appropriate action.
+ /// The entry for an ordinary call has a try-range containing the call and
+ /// zero for the landing pad and the action. Calls marked 'nounwind' have
+ /// no entry and must not be contained in the try-range of any entry - they
+ /// form gaps in the table. Entries must be ordered by try-range address.
+
+ void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+ const RangeMapType &PadMap,
+ const SmallVectorImpl<const LandingPadInfo *> &LPs,
+ const SmallVectorImpl<unsigned> &FirstActions);
+
+ /// Emit landing pads and actions.
+ ///
+ /// The general organization of the table is complex, but the basic concepts
+ /// are easy. First there is a header which describes the location and
+ /// organization of the three components that follow.
+ /// 1. The landing pad site information describes the range of code covered
+ /// by the try. In our case it's an accumulation of the ranges covered
+ /// by the invokes in the try. There is also a reference to the landing
+ /// pad that handles the exception once processed. Finally an index into
+ /// the actions table.
+ /// 2. The action table, in our case, is composed of pairs of type ids
+ /// and next action offset. Starting with the action index from the
+ /// landing pad site, each type Id is checked for a match to the current
+ /// exception. If it matches then the exception and type id are passed
+ /// on to the landing pad. Otherwise the next action is looked up. This
+ /// chain is terminated with a next action of zero. If no type id is
+ /// found the frame is unwound and handling continues.
+ /// 3. Type id table contains references to all the C++ typeinfo for all
+ /// catches in the function. This tables is reversed indexed base 1.
+ void emitExceptionTable();
+
+ virtual void emitTypeInfos(unsigned TTypeEncoding);
+
+public:
+ EHStreamer(AsmPrinter *A);
+ virtual ~EHStreamer();
+
+ /// Emit all exception information that should come after the content.
+ void endModule() override;
+
+ /// Gather pre-function exception information. Assumes being emitted
+ /// immediately after the function entry point.
+ void beginFunction(const MachineFunction *MF) override;
+
+ /// Gather and emit post-function exception information.
+ void endFunction(const MachineFunction *) override;
+
+ // Unused.
+ void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+ void beginInstruction(const MachineInstr *MI) override {}
+ void endInstruction() override {}
+};
+}
+
+#endif
+
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 17d8bff..81285d5 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -38,9 +38,8 @@
using namespace llvm;
Win64Exception::Win64Exception(AsmPrinter *A)
- : DwarfException(A),
- shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false)
- {}
+ : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false),
+ shouldEmitMoves(false) {}
Win64Exception::~Win64Exception() {}
@@ -73,14 +72,14 @@ void Win64Exception::beginFunction(const MachineFunction *MF) {
if (!shouldEmitPersonality && !shouldEmitMoves)
return;
- Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym);
+ Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym);
if (!shouldEmitPersonality)
return;
- MCSymbol *GCCHandlerSym =
- Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
- Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+ const MCSymbol *PersHandlerSym =
+ TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
+ Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
Asm->getFunctionNumber()));
@@ -99,17 +98,10 @@ void Win64Exception::endFunction(const MachineFunction *) {
MMI->TidyLandingPads();
if (shouldEmitPersonality) {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
- const MCSymbol *Sym =
- TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-
Asm->OutStreamer.PushSection();
- Asm->OutStreamer.EmitWin64EHHandlerData();
- Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
- 4);
- EmitExceptionTable();
+ Asm->OutStreamer.EmitWinEHHandlerData();
+ emitExceptionTable();
Asm->OutStreamer.PopSection();
}
- Asm->OutStreamer.EmitWin64EHEndProc();
+ Asm->OutStreamer.EmitWinCFIEndProc();
}
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 2212941..6a5c431 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
return;
const Function *GV = MF->getFunction();
- assert(FnDebugInfo.count(GV) == true);
+ assert(FnDebugInfo.count(GV));
assert(CurFn == &FnDebugInfo[GV]);
if (CurFn->Instrs.empty()) {
diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index d995333..421946d 100644
--- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -21,17 +21,19 @@
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
using namespace llvm;
#define DEBUG_TYPE "arm-atomic-expand"
namespace {
class AtomicExpandLoadLinked : public FunctionPass {
- const TargetLowering *TLI;
+ const TargetMachine *TM;
public:
static char ID; // Pass identification, replacement for typeid
explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
- : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+ : FunctionPass(ID), TM(TM) {
initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
}
@@ -50,29 +52,16 @@ namespace {
char AtomicExpandLoadLinked::ID = 0;
char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
-
-static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
- PassInfo *PI = new PassInfo(
- "Expand Atomic calls in terms of load-linked & store-conditional",
- "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
- PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false,
- false, PassInfo::TargetMachineCtor_t(
- callTargetMachineCtor<AtomicExpandLoadLinked>));
- Registry.registerPass(*PI, true);
- return PI;
-}
-
-void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
-}
-
+INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc",
+ "Expand Atomic calls in terms of load-linked & store-conditional",
+ false, false)
FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
return new AtomicExpandLoadLinked(TM);
}
bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
- if (!TLI)
+ if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked())
return false;
SmallVector<Instruction *, 1> AtomicInsts;
@@ -89,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
bool MadeChange = false;
for (Instruction *Inst : AtomicInsts) {
- if (!TLI->shouldExpandAtomicInIR(Inst))
+ if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst))
continue;
if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -111,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
// Load instructions don't actually need a leading fence, even in the
// SequentiallyConsistent case.
AtomicOrdering MemOpOrder =
- TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering();
+ TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic
+ : LI->getOrdering();
// The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
// an ldrexd (A3.5.3).
IRBuilder<> Builder(LI);
- Value *Val =
- TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+ Value *Val = TM->getTargetLowering()->emitLoadLinked(
+ Builder, LI->getPointerOperand(), MemOpOrder);
insertTrailingFence(Builder, LI->getOrdering());
@@ -178,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded =
+ TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *NewVal;
switch (AI->getOperation()) {
@@ -195,7 +186,7 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new");
break;
case AtomicRMWInst::Nand:
- NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()),
+ NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()),
"new");
break;
case AtomicRMWInst::Or:
@@ -224,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
llvm_unreachable("Unknown atomic op");
}
- Value *StoreSuccess =
- TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
+ Builder, NewVal, Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -256,19 +247,26 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// %loaded = @load.linked(%addr)
// %should_store = icmp eq %loaded, %desired
// br i1 %should_store, label %cmpxchg.trystore,
- // label %cmpxchg.end/%cmpxchg.barrier
+ // label %cmpxchg.failure
// cmpxchg.trystore:
// %stored = @store_conditional(%new, %addr)
- // %try_again = icmp i32 ne %stored, 0
- // br i1 %try_again, label %loop, label %cmpxchg.end
- // cmpxchg.barrier:
+ // %success = icmp eq i32 %stored, 0
+ // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure
+ // cmpxchg.success:
+ // fence?
+ // br label %cmpxchg.end
+ // cmpxchg.failure:
// fence?
// br label %cmpxchg.end
// cmpxchg.end:
+ // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
+ // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
+ // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
+ auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
+ auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+ auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
// This grabs the DebugLoc from CI
@@ -284,37 +282,82 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded =
+ TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
// If the the cmpxchg doesn't actually need any ordering when it fails, we can
// jump straight past that fence instruction (if it exists).
- BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB;
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess = TLI->emitStoreConditional(
+ Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional(
Builder, CI->getNewValOperand(), Addr, MemOpOrder);
- Value *TryAgain = Builder.CreateICmpNE(
+ StoreSuccess = Builder.CreateICmpEQ(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
- Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
+ Builder.CreateCondBr(StoreSuccess, SuccessBB,
+ CI->isWeak() ? FailureBB : LoopBB);
- // Finally, make sure later instructions don't get reordered with a fence if
- // necessary.
- Builder.SetInsertPoint(BarrierBB);
+ // Make sure later instructions don't get reordered with a fence if necessary.
+ Builder.SetInsertPoint(SuccessBB);
insertTrailingFence(Builder, SuccessOrder);
Builder.CreateBr(ExitBB);
- CI->replaceAllUsesWith(Loaded);
- CI->eraseFromParent();
+ Builder.SetInsertPoint(FailureBB);
+ insertTrailingFence(Builder, FailureOrder);
+ Builder.CreateBr(ExitBB);
+
+ // Finally, we have control-flow based knowledge of whether the cmpxchg
+ // succeeded or not. We expose this to later passes by converting any
+ // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI.
+ // Setup the builder so we can create any PHIs we need.
+ Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+ PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
+ Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
+ Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
+
+ // Look for any users of the cmpxchg that are just comparing the loaded value
+ // against the desired one, and replace them with the CFG-derived version.
+ SmallVector<ExtractValueInst *, 2> PrunedInsts;
+ for (auto User : CI->users()) {
+ ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
+ if (!EV)
+ continue;
+
+ assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
+ "weird extraction from { iN, i1 }");
+
+ if (EV->getIndices()[0] == 0)
+ EV->replaceAllUsesWith(Loaded);
+ else
+ EV->replaceAllUsesWith(Success);
+
+ PrunedInsts.push_back(EV);
+ }
+
+ // We can remove the instructions now we're no longer iterating through them.
+ for (auto EV : PrunedInsts)
+ EV->eraseFromParent();
+
+ if (!CI->use_empty()) {
+ // Some use of the full struct return that we don't understand has happened,
+ // so we've got to reconstruct it properly.
+ Value *Res;
+ Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
+ Res = Builder.CreateInsertValue(Res, Success, 1);
+
+ CI->replaceAllUsesWith(Res);
+ }
+
+ CI->eraseFromParent();
return true;
}
AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
+ if (!TM->getTargetLowering()->getInsertFencesForAtomic())
return Ord;
if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
@@ -327,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
- if (!TLI->getInsertFencesForAtomic())
+ if (!TM->getTargetLowering()->getInsertFencesForAtomic())
return;
if (Ord == Acquire || Ord == AcquireRelease)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index 7f31b1a..b2737bf 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
/// are set if the result needs to be inserted and/or extracted from vectors.
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+ /// Estimate the cost overhead of SK_Alternate shuffle.
+ unsigned getAltShuffleOverhead(Type *Ty) const;
+
const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
public:
@@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
return OpCost;
}
+unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const {
+ assert(Ty->isVectorTy() && "Can only shuffle vectors");
+ unsigned Cost = 0;
+ // Shuffle cost is equal to the cost of extracting element from its argument
+ // plus the cost of inserting them onto the result vector.
+
+ // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index
+ // 0 of first vector, index 1 of second vector,index 2 of first vector and
+ // finally index 3 of second vector and insert them at index <0,1,2,3> of
+ // result vector.
+ for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
+ Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
+ Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
+ }
+ return Cost;
+}
+
unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
+ if (Kind == SK_Alternate) {
+ return getAltShuffleOverhead(Tp);
+ }
return 1;
}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f623a48..7503e57 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1505,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
if (MO.isUse()) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Uses.insert(*AI);
- } else if (!MO.isDead())
- // Don't try to hoist code in the rare case the terminator defines a
- // register that is later used.
- return MBB->end();
+ } else {
+ if (!MO.isDead())
+ // Don't try to hoist code in the rare case the terminator defines a
+ // register that is later used.
+ return MBB->end();
+
+ // If the terminator defines a register, make sure we don't hoist
+ // the instruction whose def might be clobbered by the terminator.
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ Defs.insert(*AI);
+ }
}
if (Uses.empty())
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 0b492a9..57c24e8 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,11 +22,13 @@ add_llvm_library(LLVMCodeGen
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
+ GlobalMerge.cpp
IfConversion.cpp
InlineSpiller.cpp
InterferenceCache.cpp
IntrinsicLowering.cpp
JITCodeEmitter.cpp
+ JumpInstrTables.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
LexicalScopes.cpp
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6aa60c6..ccac40c 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -151,19 +151,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
}
char CodeGenPrepare::ID = 0;
-static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) {
- initializeTargetLibraryInfoPass(Registry);
- PassInfo *PI = new PassInfo(
- "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID,
- PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false,
- PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>));
- Registry.registerPass(*PI, true);
- return PI;
-}
-
-void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) {
- CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce)
-}
+INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
return new CodeGenPrepare(TM);
@@ -1078,8 +1067,11 @@ void ExtAddrMode::print(raw_ostream &OS) const {
NeedPlus = true;
}
- if (BaseOffs)
- OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+ if (BaseOffs) {
+ OS << (NeedPlus ? " + " : "")
+ << BaseOffs;
+ NeedPlus = true;
+ }
if (BaseReg) {
OS << (NeedPlus ? " + " : "")
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 822636f..d3ffcc7 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -146,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// It's not safe to change register allocation for source operands of
- // that have special allocation requirements. Also assume all registers
- // used in a call must not be changed (ABI).
+ // instructions that have special allocation requirements. Also assume all
+ // registers used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
// conservative here because the kill markers cannot be trusted after
// if-conversion:
@@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
+ // If this reg is tied and live (Classes[Reg] is set to -1), we can't change
+ // it or any of its sub or super regs. We need to use KeepRegs to mark the
+ // reg because not all uses of the same reg within an instruction are
+ // necessarily tagged as tied.
+ // Example: an x86 "xor %eax, %eax" will have one source operand tied to the
+ // def register but not the second (see PR20020 for details).
+ // FIXME: can this check be relaxed to account for undef uses
+ // of a register? In the above 'xor' example, the uses of %eax are undef, so
+ // earlier instructions could still replace %eax even though the 'xor'
+ // itself can't be changed.
+ if (MI->isRegTiedToUseOperand(i) &&
+ Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs) {
+ KeepRegs.set(*SubRegs);
+ }
+ for (MCSuperRegIterator SuperRegs(Reg, TRI);
+ SuperRegs.isValid(); ++SuperRegs) {
+ KeepRegs.set(*SuperRegs);
+ }
+ }
+
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
+
+ // If we've already marked this reg as unchangeable, carry on.
+ if (KeepRegs.test(Reg)) continue;
+
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
+ // FIXME: we should use a SubRegIterator that includes self (as above), so
+ // we don't have to repeat all this code for the reg itself.
DefIndices[Reg] = Count;
KillIndices[Reg] = ~0u;
assert(((KillIndices[Reg] == ~0u) !=
@@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
RegRefs.insert(std::make_pair(Reg, &MO));
+ // FIXME: we should use an MCRegAliasIterator that includes self so we don't
+ // have to repeat all this code for the reg itself.
+
// It wasn't previously live but now it is, this is a kill.
if (KillIndices[Reg] == ~0u) {
KillIndices[Reg] = Count;
@@ -309,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// the two-address instruction also defines NewReg, as may happen with
// pre/postincrement loads. In this case, both the use and def operands are in
// RegRefs because the def is inserted by PrescanInstruction and not erased
-// during ScanInstruction. So checking for an instructions with definitions of
+// during ScanInstruction. So checking for an instruction with definitions of
// both NewReg and AntiDepReg covers it.
bool
CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
@@ -325,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
if (RefOper->isDef() && RefOper->isEarlyClobber())
return true;
- // Handle cases in which this instructions defines NewReg.
+ // Handle cases in which this instruction defines NewReg.
MachineInstr *MI = RefOper->getParent();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &CheckOper = MI->getOperand(i);
@@ -343,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
return true;
// Don't allow an instruction using AntiDepReg to be earlyclobbered by
- // NewReg
+ // NewReg.
if (CheckOper.isEarlyClobber())
return true;
- // Don't allow inline asm to define NewReg at all. Who know what it's
+ // Don't allow inline asm to define NewReg at all. Who knows what it's
// doing with it.
if (MI->isInlineAsm())
return true;
@@ -494,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// as we go to help determine which registers are available.
unsigned Broken = 0;
unsigned Count = InsertPosIndex - 1;
- for (MachineBasicBlock::iterator I = End, E = Begin;
- I != E; --Count) {
+ for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) {
MachineInstr *MI = --I;
if (MI->isDebugValue())
continue;
@@ -526,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Don't break anti-dependencies on non-allocatable registers.
AntiDepReg = 0;
else if (KeepRegs.test(AntiDepReg))
- // Don't break anti-dependencies if an use down below requires
+ // Don't break anti-dependencies if a use down below requires
// this exact register.
AntiDepReg = 0;
else {
@@ -564,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
- TII->isPredicated(MI))
+ if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 1949a48..45e4ff5 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -55,12 +55,12 @@ class TargetRegisterInfo;
typedef std::multimap<unsigned, MachineOperand *>::const_iterator
RegRefIter;
- /// KillIndices - The index of the most recent kill (proceding bottom-up),
+ /// KillIndices - The index of the most recent kill (proceeding bottom-up),
/// or ~0u if the register is not live.
std::vector<unsigned> KillIndices;
- /// DefIndices - The index of the most recent complete def (proceding bottom
- /// up), or ~0u if the register is live.
+ /// DefIndices - The index of the most recent complete def (proceeding
+ /// bottom up), or ~0u if the register is live.
std::vector<unsigned> DefIndices;
/// KeepRegs - A set of registers which are live and cannot be changed to
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 990d067..027ee38 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -64,6 +64,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -72,7 +73,7 @@ using namespace llvm;
#define DEBUG_TYPE "global-merge"
cl::opt<bool>
-EnableGlobalMerge("global-merge", cl::Hidden,
+EnableGlobalMerge("enable-global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
cl::init(true));
@@ -81,6 +82,13 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
cl::desc("Enable global merge pass on constants"),
cl::init(false));
+// FIXME: this could be a transitional option, and we probably need to remove
+// it if only we are sure this optimization could always benefit all targets.
+static cl::opt<bool>
+EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
+ cl::desc("Enable global merge pass on external linkage"),
+ cl::init(false));
+
STATISTIC(NumMerged , "Number of globals merged");
namespace {
class GlobalMerge : public FunctionPass {
@@ -129,9 +137,8 @@ namespace {
} // end anonymous namespace
char GlobalMerge::ID = 0;
-INITIALIZE_PASS(GlobalMerge, "global-merge",
- "Global Merge", false, false)
-
+INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables",
+ false, false)
bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
@@ -154,11 +161,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Type *Int32Ty = Type::getInt32Ty(M.getContext());
+ assert(Globals.size() > 1);
+
+ // FIXME: This simple solution merges globals all together as maximum as
+ // possible. However, with this solution it would be hard to remove dead
+ // global symbols at link-time. An alternative solution could be checking
+ // global symbols references function by function, and make the symbols
+ // being referred in the same function merged and we would probably need
+ // to introduce heuristic algorithm to solve the merge conflict from
+ // different functions.
for (size_t i = 0, e = Globals.size(); i != e; ) {
size_t j = 0;
uint64_t MergedSize = 0;
std::vector<Type*> Tys;
std::vector<Constant*> Inits;
+
+ bool HasExternal = false;
+ GlobalVariable *TheFirstExternal = 0;
for (j = i; j != e; ++j) {
Type *Ty = Globals[j]->getType()->getElementType();
MergedSize += DL->getTypeAllocSize(Ty);
@@ -167,17 +186,35 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
}
Tys.push_back(Ty);
Inits.push_back(Globals[j]->getInitializer());
+
+ if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+ HasExternal = true;
+ TheFirstExternal = Globals[j];
+ }
}
+ // If merged variables doesn't have external linkage, we needn't to expose
+ // the symbol after merging.
+ GlobalValue::LinkageTypes Linkage = HasExternal
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+
StructType *MergedTy = StructType::get(M.getContext(), Tys);
Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
- GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
- GlobalValue::InternalLinkage,
- MergedInit, "_MergedGlobals",
- nullptr,
- GlobalVariable::NotThreadLocal,
- AddrSpace);
+
+ // If merged variables have external linkage, we use symbol name of the
+ // first variable merged as the suffix of global symbol name. This would
+ // be able to avoid the link-time naming conflict for globalm symbols.
+ GlobalVariable *MergedGV = new GlobalVariable(
+ M, MergedTy, isConst, Linkage, MergedInit,
+ HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
+ : "_MergedGlobals",
+ nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+
for (size_t k = i; k < j; ++k) {
+ GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
+ std::string Name = Globals[k]->getName();
+
Constant *Idx[2] = {
ConstantInt::get(Int32Ty, 0),
ConstantInt::get(Int32Ty, k-i)
@@ -185,6 +222,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
Globals[k]->replaceAllUsesWith(GEP);
Globals[k]->eraseFromParent();
+
+ if (Linkage != GlobalValue::InternalLinkage) {
+ // Generate a new alias...
+ auto *PTy = cast<PointerType>(GEP->getType());
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ Linkage, Name, GEP, &M);
+ }
+
NumMerged++;
}
i = j;
@@ -245,8 +290,12 @@ bool GlobalMerge::doInitialization(Module &M) {
// Grab all non-const globals.
for (Module::global_iterator I = M.global_begin(),
E = M.global_end(); I != E; ++I) {
- // Merge is safe for "normal" internal globals only
- if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ // Merge is safe for "normal" internal or external globals only
+ if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
+ !I->hasInternalLinkage())
continue;
PointerType *PT = dyn_cast<PointerType>(I->getType());
@@ -270,8 +319,7 @@ bool GlobalMerge::doInitialization(Module &M) {
continue;
if (DL->getTypeAllocSize(Ty) < MaxOffset) {
- if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
- .isBSSLocal())
+ if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
BSSGlobals[AddressSpace].push_back(I);
else if (I->isConstant())
ConstGlobals[AddressSpace].push_back(I);
diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp
new file mode 100644
index 0000000..61ef722
--- /dev/null
+++ b/lib/CodeGen/JumpInstrTables.cpp
@@ -0,0 +1,301 @@
+//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===//
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief An implementation of jump-instruction tables.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jt"
+
+#include "llvm/CodeGen/JumpInstrTables.h"
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/JumpInstrTableInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+using namespace llvm;
+
+char JumpInstrTables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables",
+ "Jump-Instruction Tables", true, true)
+INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo);
+INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables",
+ "Jump-Instruction Tables", true, true)
+
+STATISTIC(NumJumpTables, "Number of indirect call tables generated");
+STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables");
+
+ModulePass *llvm::createJumpInstrTablesPass() {
+ // The default implementation uses a single table for all functions.
+ return new JumpInstrTables(JumpTable::Single);
+}
+
+ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) {
+ return new JumpInstrTables(JTT);
+}
+
+namespace {
+static const char jump_func_prefix[] = "__llvm_jump_instr_table_";
+static const char jump_section_prefix[] = ".jump.instr.table.text.";
+
+// Checks to see if a given CallSite is making an indirect call, including
+// cases where the indirect call is made through a bitcast.
+bool isIndirectCall(CallSite &CS) {
+ if (CS.getCalledFunction())
+ return false;
+
+ // Check the value to see if it is merely a bitcast of a function. In
+ // this case, it will translate to a direct function call in the resulting
+ // assembly, so we won't treat it as an indirect call here.
+ const Value *V = CS.getCalledValue();
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ return !(CE->isCast() && isa<Function>(CE->getOperand(0)));
+ }
+
+ // Otherwise, since we know it's a call, it must be an indirect call
+ return true;
+}
+
+// Replaces Functions and GlobalAliases with a different Value.
+bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) {
+ User *Us = U->getUser();
+ if (!Us)
+ return false;
+ if (Instruction *I = dyn_cast<Instruction>(Us)) {
+ CallSite CS(I);
+
+ // Don't do the replacement if this use is a direct call to this function.
+ // If the use is not the called value, then replace it.
+ if (CS && (isIndirectCall(CS) || CS.isCallee(U))) {
+ return false;
+ }
+
+ U->set(V);
+ } else if (Constant *C = dyn_cast<Constant>(Us)) {
+ // Don't replace calls to bitcasts of function symbols, since they get
+ // translated to direct calls.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) {
+ if (CE->getOpcode() == Instruction::BitCast) {
+ // This bitcast must have exactly one user.
+ if (CE->user_begin() != CE->user_end()) {
+ User *ParentUs = *CE->user_begin();
+ if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) {
+ CallSite CS(CI);
+ Use &CEU = *CE->use_begin();
+ if (CS.isCallee(&CEU)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+
+ // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier
+ // requires alias to point to a defined function. So, GlobalAlias is handled
+ // as a separate case in runOnModule.
+ if (!isa<GlobalAlias>(C))
+ C->replaceUsesOfWithOnConstant(GV, V, U);
+ } else {
+ assert(false && "The Use of a Function symbol is neither an instruction nor"
+ " a constant");
+ }
+
+ return true;
+}
+
+// Replaces all replaceable address-taken uses of GV with a pointer to a
+// jump-instruction table entry.
+void replaceValueWithFunction(GlobalValue *GV, Function *F) {
+ // Go through all uses of this function and replace the uses of GV with the
+ // jump-table version of the function. Get the uses as a vector before
+ // replacing them, since replacing them changes the use list and invalidates
+ // the iterator otherwise.
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) {
+ Use &U = *I++;
+
+ // Replacement of constants replaces all instances in the constant. So, some
+ // uses might have already been handled by the time we reach them here.
+ if (U.get() == GV)
+ replaceGlobalValueIndirectUse(GV, F, &U);
+ }
+
+ return;
+}
+} // end anonymous namespace
+
+JumpInstrTables::JumpInstrTables()
+ : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0),
+ JTType(JumpTable::Single) {
+ initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT)
+ : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) {
+ initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry());
+}
+
+JumpInstrTables::~JumpInstrTables() {}
+
+void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<JumpInstrTableInfo>();
+}
+
+Function *JumpInstrTables::insertEntry(Module &M, Function *Target) {
+ FunctionType *OrigFunTy = Target->getFunctionType();
+ FunctionType *FunTy = transformType(OrigFunTy);
+
+ JumpMap::iterator it = Metadata.find(FunTy);
+ if (Metadata.end() == it) {
+ struct TableMeta Meta;
+ Meta.TableNum = TableCount;
+ Meta.Count = 0;
+ Metadata[FunTy] = Meta;
+ it = Metadata.find(FunTy);
+ ++NumJumpTables;
+ ++TableCount;
+ }
+
+ it->second.Count++;
+
+ std::string NewName(jump_func_prefix);
+ NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str();
+ Function *JumpFun =
+ Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M);
+ // The section for this table
+ JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str());
+ JITI->insertEntry(FunTy, Target, JumpFun);
+
+ ++NumFuncsInJumpTables;
+ return JumpFun;
+}
+
+bool JumpInstrTables::hasTable(FunctionType *FunTy) {
+ FunctionType *TransTy = transformType(FunTy);
+ return Metadata.end() != Metadata.find(TransTy);
+}
+
+FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) {
+ // Returning nullptr forces all types into the same table, since all types map
+ // to the same type
+ Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext());
+
+ // Ignore the return type.
+ Type *RetTy = VoidPtrTy;
+ bool IsVarArg = FunTy->isVarArg();
+ std::vector<Type *> ParamTys(FunTy->getNumParams());
+ FunctionType::param_iterator PI, PE;
+ int i = 0;
+
+ std::vector<Type *> EmptyParams;
+ Type *Int32Ty = Type::getInt32Ty(FunTy->getContext());
+ FunctionType *VoidFnTy = FunctionType::get(
+ Type::getVoidTy(FunTy->getContext()), EmptyParams, false);
+ switch (JTType) {
+ case JumpTable::Single:
+
+ return FunctionType::get(RetTy, EmptyParams, false);
+ case JumpTable::Arity:
+ // Transform all types to void* so that all functions with the same arity
+ // end up in the same table.
+ for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+ PI++, i++) {
+ ParamTys[i] = VoidPtrTy;
+ }
+
+ return FunctionType::get(RetTy, ParamTys, IsVarArg);
+ case JumpTable::Simplified:
+ // Project all parameters types to one of 3 types: composite, integer, and
+ // function, matching the three subclasses of Type.
+ for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE;
+ ++PI, ++i) {
+ assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) ||
+ isa<CompositeType>(*PI)) &&
+ "This type is not an Integer or a Composite or a Function");
+ if (isa<CompositeType>(*PI)) {
+ ParamTys[i] = VoidPtrTy;
+ } else if (isa<FunctionType>(*PI)) {
+ ParamTys[i] = VoidFnTy;
+ } else if (isa<IntegerType>(*PI)) {
+ ParamTys[i] = Int32Ty;
+ }
+ }
+
+ return FunctionType::get(RetTy, ParamTys, IsVarArg);
+ case JumpTable::Full:
+ // Don't transform this type at all.
+ return FunTy;
+ }
+
+ return nullptr;
+}
+
+bool JumpInstrTables::runOnModule(Module &M) {
+ // Make sure the module is well-formed, especially with respect to jumptable.
+ if (verifyModule(M))
+ return false;
+
+ JITI = &getAnalysis<JumpInstrTableInfo>();
+
+ // Get the set of jumptable-annotated functions.
+ DenseMap<Function *, Function *> Functions;
+ for (Function &F : M) {
+ if (F.hasFnAttribute(Attribute::JumpTable)) {
+ assert(F.hasUnnamedAddr() &&
+ "Attribute 'jumptable' requires 'unnamed_addr'");
+ Functions[&F] = nullptr;
+ }
+ }
+
+ // Create the jump-table functions.
+ for (auto &KV : Functions) {
+ Function *F = KV.first;
+ KV.second = insertEntry(M, F);
+ }
+
+ // GlobalAlias is a special case, because the target of an alias statement
+ // must be a defined function. So, instead of replacing a given function in
+ // the alias, we replace all uses of aliases that target jumptable functions.
+ // Note that there's no need to create these functions, since only aliases
+ // that target known jumptable functions are replaced, and there's no way to
+ // put the jumptable annotation on a global alias.
+ DenseMap<GlobalAlias *, Function *> Aliases;
+ for (GlobalAlias &GA : M.aliases()) {
+ Constant *Aliasee = GA.getAliasee();
+ if (Function *F = dyn_cast<Function>(Aliasee)) {
+ auto it = Functions.find(F);
+ if (it != Functions.end()) {
+ Aliases[&GA] = it->second;
+ }
+ }
+ }
+
+ // Replace each address taken function with its jump-instruction table entry.
+ for (auto &KV : Functions)
+ replaceValueWithFunction(KV.first, KV.second);
+
+ for (auto &KV : Aliases)
+ replaceValueWithFunction(KV.first, KV.second);
+
+ return !Functions.empty();
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index a5ac057..df96b94 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -12,11 +12,15 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetMachine.h"
+
+#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/JumpInstrTables.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -82,6 +86,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
+
// Add internal analysis passes from the target machine.
TM->addAnalysisPasses(PM);
@@ -136,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) {
+ // Passes to handle jumptable function annotations. These can't be handled at
+ // JIT time, so we don't add them directly to addPassesToGenerateCode.
+ PM.add(createJumpInstrTableInfoPass());
+ PM.add(createJumpInstrTablesPass(Options.JTType));
+
// Add common CodeGen passes.
MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify,
StartAfter, StopAfter);
@@ -199,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_Null:
// The Null output is intended for use for performance analysis and testing,
// not real users.
- AsmStreamer.reset(createNullStreamer(*Context));
+ AsmStreamer.reset(getTarget().createNullStreamer(*Context));
break;
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 388f58f..7d5646b 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -329,12 +329,13 @@ class LDVImpl {
void computeIntervals();
public:
- LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false),
- ModifiedMF(false) {}
+ LDVImpl(LiveDebugVariables *ps)
+ : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {}
bool runOnMachineFunction(MachineFunction &mf);
/// clear - Release all memory.
void clear() {
+ MF = nullptr;
userValues.clear();
virtRegToEqClass.clear();
userVarMap.clear();
@@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() {
}
bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ clear();
MF = &mf;
LIS = &pass.getAnalysis<LiveIntervals>();
MDT = &pass.getAnalysis<MachineDominatorTree>();
TRI = mf.getTarget().getRegisterInfo();
- clear();
LS.initialize(mf);
DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
<< mf.getName() << " **********\n");
@@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
if (!EnableLDV)
return false;
+ if (!FunctionDIs.count(mf.getFunction()))
+ return false;
if (!pImpl)
pImpl = new LDVImpl(this);
return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
@@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ if (!MF)
+ return;
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
DEBUG(userValues[i]->print(dbgs(), &MF->getTarget()));
@@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
}
+bool LiveDebugVariables::doInitialization(Module &M) {
+ FunctionDIs = makeSubprogramMap(M);
+ return Pass::doInitialization(M);
+}
#ifndef NDEBUG
void LiveDebugVariables::dump() {
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index bb67435..7ec0d17 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -22,6 +22,7 @@
#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
namespace llvm {
@@ -32,6 +33,7 @@ class VirtRegMap;
class LiveDebugVariables : public MachineFunctionPass {
void *pImpl;
+ DenseMap<const Function*, DISubprogram> FunctionDIs;
public:
static char ID; // Pass identification, replacement for typeid
@@ -64,6 +66,7 @@ private:
bool runOnMachineFunction(MachineFunction &) override;
void releaseMemory() override;
void getAnalysisUsage(AnalysisUsage &) const override;
+ bool doInitialization(Module &) override;
};
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 3563f8e..1559560 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -186,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
LRCalc->createDeadDefs(LI);
LRCalc->extendToUses(LI);
+ computeDeadValues(&LI, LI, nullptr, nullptr);
}
void LiveIntervals::computeVirtRegs() {
@@ -412,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
// Handle dead values.
bool CanSeparate = false;
+ computeDeadValues(li, NewLR, &CanSeparate, dead);
+
+ // Move the trimmed segments back.
+ li->segments.swap(NewLR.segments);
+ DEBUG(dbgs() << "Shrunk: " << *li << '\n');
+ return CanSeparate;
+}
+
+void LiveIntervals::computeDeadValues(LiveInterval *li,
+ LiveRange &LR,
+ bool *CanSeparate,
+ SmallVectorImpl<MachineInstr*> *dead) {
for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
I != E; ++I) {
VNInfo *VNI = *I;
if (VNI->isUnused())
continue;
- LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def);
- assert(LRI != NewLR.end() && "Missing segment for PHI");
+ LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def);
+ assert(LRI != LR.end() && "Missing segment for PHI");
if (LRI->end != VNI->def.getDeadSlot())
continue;
if (VNI->isPHIDef()) {
// This is a dead PHI. Remove it.
VNI->markUnused();
- NewLR.removeSegment(LRI->start, LRI->end);
+ LR.removeSegment(LRI->start, LRI->end);
DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n");
- CanSeparate = true;
+ if (CanSeparate)
+ *CanSeparate = true;
} else {
// This is a dead def. Make sure the instruction knows.
MachineInstr *MI = getInstructionFromIndex(VNI->def);
@@ -438,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
}
}
}
-
- // Move the trimmed segments back.
- li->segments.swap(NewLR.segments);
- DEBUG(dbgs() << "Shrunk: " << *li << '\n');
- return CanSeparate;
}
void LiveIntervals::extendToIndices(LiveRange &LR,
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 0ec5c33..08fef5f 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -332,7 +332,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
}
}
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
OS << "BB#" << getNumber();
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index eb3d71f..6138aef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
/// normal 'L' label is returned.
-MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
bool isLinkerPrivate) const {
const DataLayout *DL = getTarget().getDataLayout();
assert(JumpTableInfo && "No jump tables");
@@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
///
int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
unsigned Alignment) {
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(
+ !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+ getFrameLowering()->getStackAlignment());
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
ensureMaxAlignment(Alignment);
@@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
- Alignment =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Alignment, getFrameLowering()->getStackAlignment());
+ Alignment = clampStackAlignment(
+ !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment,
+ getFrameLowering()->getStackAlignment());
Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
ensureMaxAlignment(Alignment);
return (int)Objects.size()-NumFixedObjects-1;
@@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// object is 16-byte aligned.
unsigned StackAlign = getFrameLowering()->getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
- Align =
- clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
- !RealignOption,
- Align, getFrameLowering()->getStackAlignment());
+ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+ !RealignOption,
+ Align, getFrameLowering()->getStackAlignment());
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
/*Alloca*/ nullptr));
return -++NumFixedObjects;
}
+/// CreateFixedSpillStackObject - Create a spill slot at a fixed location
+/// on the stack. Returns an index with a negative value.
+int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
+ int64_t SPOffset) {
+ unsigned StackAlign = getFrameLowering()->getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
+ !RealignOption,
+ Align, getFrameLowering()->getStackAlignment());
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
+ /*Immutable*/ true,
+ /*isSS*/ true,
+ /*Alloca*/ nullptr));
+ return -++NumFixedObjects;
+}
BitVector
MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
@@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
return false;
-
+
// For now, only support constants with the same size.
uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
- if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
- StoreSize > 128)
+ if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128)
return false;
Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
@@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
/// an existing one. User must specify the log2 of the minimum required
/// alignment for the object.
///
-unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
unsigned Alignment) {
assert(Alignment && "Alignment must be specified!");
if (Alignment > PoolAlignment) PoolAlignment = Alignment;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 23847d6..44191f7 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -333,6 +333,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (skipOptnoneFunction(*mf.getFunction()))
return false;
+ const TargetSubtargetInfo &ST =
+ mf.getTarget().getSubtarget<TargetSubtargetInfo>();
+ if (!ST.enablePostMachineScheduler()) {
+ DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
+ return false;
+ }
DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
// Initialize the context of the pass.
@@ -472,14 +478,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
// unimplemented
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void ReadyQueue::dump() {
dbgs() << Name << ": ";
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
dbgs() << Queue[i]->NodeNum << " ";
dbgs() << "\n";
}
-#endif
//===----------------------------------------------------------------------===//
// ScheduleDAGMI - Basic machine instruction scheduling. This is
@@ -529,6 +534,11 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
llvm_unreachable(nullptr);
}
#endif
+ // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
+ SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
+
--SuccSU->NumPredsLeft;
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
SchedImpl->releaseTopNode(SuccSU);
@@ -563,6 +573,11 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
llvm_unreachable(nullptr);
}
#endif
+ // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
+ // CurrCycle may have advanced since then.
+ if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
+ PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
+
--PredSU->NumSuccsLeft;
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
SchedImpl->releaseBottomNode(PredSU);
@@ -674,10 +689,13 @@ void ScheduleDAGMI::schedule() {
CurrentBottom = MI;
}
}
- updateQueues(SU, IsTopNode);
-
- // Notify the scheduling strategy after updating the DAG.
+ // Notify the scheduling strategy before updating the DAG.
+ // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
+ // runs, it can then use the accurate ReadyCycle time to determine whether
+ // newly released nodes can move to the readyQ.
SchedImpl->schedNode(SU, IsTopNode);
+
+ updateQueues(SU, IsTopNode);
}
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
@@ -1568,7 +1586,7 @@ void SchedBoundary::reset() {
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
// reserved (SchedBoundary::ReservedCycles).
- MaxObservedLatency = 0;
+ MaxObservedStall = 0;
#endif
// Reserve a zero-count for invalid CritResIdx.
ExecutedResCounts.resize(1);
@@ -1668,8 +1686,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle)
+ unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles);
+ if (NRCycle > CurrCycle) {
+#ifndef NDEBUG
+ MaxObservedStall = std::max(PI->Cycles, MaxObservedStall);
+#endif
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
+ << SchedModel->getResourceName(PI->ProcResourceIdx)
+ << "=" << NRCycle << "c\n");
return true;
+ }
}
}
return false;
@@ -1725,6 +1751,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) {
}
void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
+ assert(SU->getInstr() && "Scheduled SUnit must have instr");
+
+#ifndef NDEBUG
+ // ReadyCycle was been bumped up to the CurrCycle when this node was
+ // scheduled, but CurrCycle may have been eagerly advanced immediately after
+ // scheduling, so may now be greater than ReadyCycle.
+ if (ReadyCycle > CurrCycle)
+ MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
+#endif
+
if (ReadyCycle < MinReadyCycle)
MinReadyCycle = ReadyCycle;
@@ -1744,18 +1780,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) {
if (SU->isScheduled)
return;
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isWeak())
- continue;
- unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
- unsigned Latency = I->getLatency();
-#ifndef NDEBUG
- MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
- if (SU->TopReadyCycle < PredReadyCycle + Latency)
- SU->TopReadyCycle = PredReadyCycle + Latency;
- }
releaseNode(SU, SU->TopReadyCycle);
}
@@ -1763,20 +1787,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) {
if (SU->isScheduled)
return;
- assert(SU->getInstr() && "Scheduled SUnit must have instr");
-
- for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isWeak())
- continue;
- unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
- unsigned Latency = I->getLatency();
-#ifndef NDEBUG
- MaxObservedLatency = std::max(Latency, MaxObservedLatency);
-#endif
- if (SU->BotReadyCycle < SuccReadyCycle + Latency)
- SU->BotReadyCycle = SuccReadyCycle + Latency;
- }
releaseNode(SU, SU->BotReadyCycle);
}
@@ -1943,10 +1953,12 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
- ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle;
-#ifndef NDEBUG
- MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency);
-#endif
+ if (isTop()) {
+ ReservedCycles[PIdx] =
+ std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
+ }
+ else
+ ReservedCycles[PIdx] = NextCycle;
}
}
}
@@ -2049,8 +2061,10 @@ SUnit *SchedBoundary::pickOnlyChoice() {
}
}
for (unsigned i = 0; Available.empty(); ++i) {
- assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
- "permanent hazard"); (void)i;
+// FIXME: Re-enable assert once PR20057 is resolved.
+// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
+// "permanent hazard");
+ (void)i;
bumpCycle(CurrCycle + 1);
releasePending();
}
@@ -2090,111 +2104,6 @@ void SchedBoundary::dumpScheduledState() {
// GenericScheduler - Generic implementation of MachineSchedStrategy.
//===----------------------------------------------------------------------===//
-namespace {
-/// Base class for GenericScheduler. This class maintains information about
-/// scheduling candidates based on TargetSchedModel making it easy to implement
-/// heuristics for either preRA or postRA scheduling.
-class GenericSchedulerBase : public MachineSchedStrategy {
-public:
- /// Represent the type of SchedCandidate found within a single queue.
- /// pickNodeBidirectional depends on these listed by decreasing priority.
- enum CandReason {
- NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax,
- ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
- TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
-
-#ifndef NDEBUG
- static const char *getReasonStr(GenericSchedulerBase::CandReason Reason);
-#endif
-
- /// Policy for scheduling the next instruction in the candidate's zone.
- struct CandPolicy {
- bool ReduceLatency;
- unsigned ReduceResIdx;
- unsigned DemandResIdx;
-
- CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
- };
-
- /// Status of an instruction's critical resource consumption.
- struct SchedResourceDelta {
- // Count critical resources in the scheduled region required by SU.
- unsigned CritResources;
-
- // Count critical resources from another region consumed by SU.
- unsigned DemandedResources;
-
- SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
-
- bool operator==(const SchedResourceDelta &RHS) const {
- return CritResources == RHS.CritResources
- && DemandedResources == RHS.DemandedResources;
- }
- bool operator!=(const SchedResourceDelta &RHS) const {
- return !operator==(RHS);
- }
- };
-
- /// Store the state used by GenericScheduler heuristics, required for the
- /// lifetime of one invocation of pickNode().
- struct SchedCandidate {
- CandPolicy Policy;
-
- // The best SUnit candidate.
- SUnit *SU;
-
- // The reason for this candidate.
- CandReason Reason;
-
- // Set of reasons that apply to multiple candidates.
- uint32_t RepeatReasonSet;
-
- // Register pressure values for the best candidate.
- RegPressureDelta RPDelta;
-
- // Critical resource consumption of the best candidate.
- SchedResourceDelta ResDelta;
-
- SchedCandidate(const CandPolicy &policy)
- : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
-
- bool isValid() const { return SU; }
-
- // Copy the status of another candidate without changing policy.
- void setBest(SchedCandidate &Best) {
- assert(Best.Reason != NoCand && "uninitialized Sched candidate");
- SU = Best.SU;
- Reason = Best.Reason;
- RPDelta = Best.RPDelta;
- ResDelta = Best.ResDelta;
- }
-
- bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
- void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
-
- void initResourceDelta(const ScheduleDAGMI *DAG,
- const TargetSchedModel *SchedModel);
- };
-
-protected:
- const MachineSchedContext *Context;
- const TargetSchedModel *SchedModel;
- const TargetRegisterInfo *TRI;
-
- SchedRemainder Rem;
-protected:
- GenericSchedulerBase(const MachineSchedContext *C):
- Context(C), SchedModel(nullptr), TRI(nullptr) {}
-
- void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
- SchedBoundary *OtherZone);
-
-#ifndef NDEBUG
- void traceCandidate(const SchedCandidate &Cand);
-#endif
-};
-} // namespace
-
void GenericSchedulerBase::SchedCandidate::
initResourceDelta(const ScheduleDAGMI *DAG,
const TargetSchedModel *SchedModel) {
@@ -2430,65 +2339,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand,
<< GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n');
}
-namespace {
-/// GenericScheduler shrinks the unscheduled zone using heuristics to balance
-/// the schedule.
-class GenericScheduler : public GenericSchedulerBase {
- ScheduleDAGMILive *DAG;
-
- // State of the top and bottom scheduled instruction boundaries.
- SchedBoundary Top;
- SchedBoundary Bot;
-
- MachineSchedPolicy RegionPolicy;
-public:
- GenericScheduler(const MachineSchedContext *C):
- GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
- Bot(SchedBoundary::BotQID, "BotQ") {}
-
- void initPolicy(MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned NumRegionInstrs) override;
-
- bool shouldTrackPressure() const override {
- return RegionPolicy.ShouldTrackPressure;
- }
-
- void initialize(ScheduleDAGMI *dag) override;
-
- SUnit *pickNode(bool &IsTopNode) override;
-
- void schedNode(SUnit *SU, bool IsTopNode) override;
-
- void releaseTopNode(SUnit *SU) override {
- Top.releaseTopNode(SU);
- }
-
- void releaseBottomNode(SUnit *SU) override {
- Bot.releaseBottomNode(SU);
- }
-
- void registerRoots() override;
-
-protected:
- void checkAcyclicLatency();
-
- void tryCandidate(SchedCandidate &Cand,
- SchedCandidate &TryCand,
- SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- RegPressureTracker &TempTracker);
-
- SUnit *pickNodeBidirectional(bool &IsTopNode);
-
- void pickNodeFromQueue(SchedBoundary &Zone,
- const RegPressureTracker &RPTracker,
- SchedCandidate &Candidate);
-
- void reschedulePhysRegCopies(SUnit *SU, bool isTop);
-};
-} // namespace
-
void GenericScheduler::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() &&
"(PreRA)GenericScheduler needs vreg liveness");
@@ -3023,75 +2873,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.",
// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
//===----------------------------------------------------------------------===//
-namespace {
-/// PostGenericScheduler - Interface to the scheduling algorithm used by
-/// ScheduleDAGMI.
-///
-/// Callbacks from ScheduleDAGMI:
-/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ...
-class PostGenericScheduler : public GenericSchedulerBase {
- ScheduleDAGMI *DAG;
- SchedBoundary Top;
- SmallVector<SUnit*, 8> BotRoots;
-public:
- PostGenericScheduler(const MachineSchedContext *C):
- GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
-
- virtual ~PostGenericScheduler() {}
-
- void initPolicy(MachineBasicBlock::iterator Begin,
- MachineBasicBlock::iterator End,
- unsigned NumRegionInstrs) override {
- /* no configurable policy */
- };
-
- /// PostRA scheduling does not track pressure.
- bool shouldTrackPressure() const override { return false; }
-
- void initialize(ScheduleDAGMI *Dag) override {
- DAG = Dag;
- SchedModel = DAG->getSchedModel();
- TRI = DAG->TRI;
-
- Rem.init(DAG, SchedModel);
- Top.init(DAG, SchedModel, &Rem);
- BotRoots.clear();
-
- // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
- // or are disabled, then these HazardRecs will be disabled.
- const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
- const TargetMachine &TM = DAG->MF.getTarget();
- if (!Top.HazardRec) {
- Top.HazardRec =
- TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
- }
- }
-
- void registerRoots() override;
-
- SUnit *pickNode(bool &IsTopNode) override;
-
- void scheduleTree(unsigned SubtreeID) override {
- llvm_unreachable("PostRA scheduler does not support subtree analysis.");
- }
-
- void schedNode(SUnit *SU, bool IsTopNode) override;
+void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
+ DAG = Dag;
+ SchedModel = DAG->getSchedModel();
+ TRI = DAG->TRI;
- void releaseTopNode(SUnit *SU) override {
- Top.releaseTopNode(SU);
- }
+ Rem.init(DAG, SchedModel);
+ Top.init(DAG, SchedModel, &Rem);
+ BotRoots.clear();
- // Only called for roots.
- void releaseBottomNode(SUnit *SU) override {
- BotRoots.push_back(SU);
+ // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
+ // or are disabled, then these HazardRecs will be disabled.
+ const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
+ const TargetMachine &TM = DAG->MF.getTarget();
+ if (!Top.HazardRec) {
+ Top.HazardRec =
+ TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
}
+}
-protected:
- void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
-
- void pickNodeFromQueue(SchedCandidate &Cand);
-};
-} // namespace
void PostGenericScheduler::registerRoots() {
Rem.CriticalPath = DAG->ExitSU.getDepth();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index b3f7198..249b2d0 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -30,11 +30,6 @@
using namespace llvm;
-namespace llvm {
-extern cl::opt<bool> EnableStackMapLiveness;
-extern cl::opt<bool> EnablePatchPointLiveness;
-}
-
static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -92,9 +87,9 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't
-// be part of the standard pass pipeline, and the target would just add a PostRA
-// scheduling pass wherever it wants.
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
+// wouldn't be part of the standard pass pipeline, and the target would just add
+// a PostRA scheduling pass wherever it wants.
static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
@@ -421,7 +416,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
// FALLTHROUGH
case ExceptionHandling::DwarfCFI:
case ExceptionHandling::ARM:
- case ExceptionHandling::Win64:
+ case ExceptionHandling::WinEH:
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
@@ -566,8 +561,7 @@ void TargetPassConfig::addMachinePasses() {
if (addPreEmitPass())
printAndVerify("After PreEmit passes");
- if (EnableStackMapLiveness || EnablePatchPointLiveness)
- addPass(&StackMapLivenessID);
+ addPass(&StackMapLivenessID);
}
/// Add passes that optimize machine instructions in SSA form.
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index eeee93a..716cb1f 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -91,6 +91,10 @@ static cl::opt<bool>
DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
cl::desc("Disable the peephole optimizer"));
+static cl::opt<bool>
+DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true),
+ cl::desc("Disable advanced copy optimization"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumCmps, "Number of compares eliminated");
STATISTIC(NumImmFold, "Number of move immediate folded");
@@ -137,6 +141,105 @@ namespace {
bool isLoadFoldable(MachineInstr *MI,
SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
};
+
+ /// \brief Helper class to track the possible sources of a value defined by
+ /// a (chain of) copy related instructions.
+ /// Given a definition (instruction and definition index), this class
+ /// follows the use-def chain to find successive suitable sources.
+ /// The given source can be used to rewrite the definition into
+ /// def = COPY src.
+ ///
+ /// For instance, let us consider the following snippet:
+ /// v0 =
+ /// v2 = INSERT_SUBREG v1, v0, sub0
+ /// def = COPY v2.sub0
+ ///
+ /// Using a ValueTracker for def = COPY v2.sub0 will give the following
+ /// suitable sources:
+ /// v2.sub0 and v0.
+ /// Then, def can be rewritten into def = COPY v0.
+ class ValueTracker {
+ private:
+ /// The current point into the use-def chain.
+ const MachineInstr *Def;
+ /// The index of the definition in Def.
+ unsigned DefIdx;
+ /// The sub register index of the definition.
+ unsigned DefSubReg;
+ /// The register where the value can be found.
+ unsigned Reg;
+ /// Specifiy whether or not the value tracking looks through
+ /// complex instructions. When this is false, the value tracker
+ /// bails on everything that is not a copy or a bitcast.
+ ///
+ /// Note: This could have been implemented as a specialized version of
+ /// the ValueTracker class but that would have complicated the code of
+ /// the users of this class.
+ bool UseAdvancedTracking;
+ /// Optional MachineRegisterInfo used to perform some complex
+ /// tracking.
+ const MachineRegisterInfo *MRI;
+
+ /// \brief Dispatcher to the right underlying implementation of
+ /// getNextSource.
+ bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for Copy instructions.
+ bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for Bitcast instructions.
+ bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for RegSequence
+ /// instructions.
+ bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for InsertSubreg
+ /// instructions.
+ bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for ExtractSubreg
+ /// instructions.
+ bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg);
+ /// \brief Specialized version of getNextSource for SubregToReg
+ /// instructions.
+ bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+ public:
+ /// \brief Create a ValueTracker instance for the value defines by \p MI
+ /// at the operand index \p DefIdx.
+ /// \p DefSubReg represents the sub register index the value tracker will
+ /// track. It does not need to match the sub register index used in \p MI.
+ /// \p UseAdvancedTracking specifies whether or not the value tracker looks
+ /// through complex instructions. By default (false), it handles only copy
+ /// and bitcast instructions.
+ /// \p MRI useful to perform some complex checks.
+ ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
+ bool UseAdvancedTracking = false,
+ const MachineRegisterInfo *MRI = nullptr)
+ : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
+ UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) {
+ assert(Def->getOperand(DefIdx).isDef() &&
+ Def->getOperand(DefIdx).isReg() &&
+ "Definition does not match machine instruction");
+ // Initially the value is in the defined register.
+ Reg = Def->getOperand(DefIdx).getReg();
+ }
+
+ /// \brief Following the use-def chain, get the next available source
+ /// for the tracked value.
+ /// When the returned value is not nullptr, getReg() gives the register
+ /// that contain the tracked value.
+ /// \note The sub register index returned in \p SrcSubReg must be used
+ /// on that getReg() to access the actual value.
+ /// \return Unless the returned value is nullptr (i.e., no source found),
+ /// \p SrcIdx gives the index of the next source in the returned
+ /// instruction and \p SrcSubReg the index to be used on that source to
+ /// get the tracked value. When nullptr is returned, no alternative source
+ /// has been found.
+ const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg);
+
+ /// \brief Get the last register where the initial value can be found.
+ /// Initially this is the register of the definition.
+ /// Then, after each successful call to getNextSource, this is the
+ /// register of the last source.
+ unsigned getReg() const { return Reg; }
+ };
}
char PeepholeOptimizer::ID = 0;
@@ -443,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
unsigned Src;
unsigned SrcSubReg;
bool ShouldRewrite = false;
- MachineInstr *Copy = MI;
const TargetRegisterInfo &TRI = *TM->getRegisterInfo();
- // Follow the chain of copies until we reach the top or find a
- // more suitable source.
+ // Follow the chain of copies until we reach the top of the use-def chain
+ // or find a more suitable source.
+ ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI);
do {
- unsigned CopyDefIdx, CopySrcIdx;
- if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx))
+ unsigned CopySrcIdx, CopySrcSubReg;
+ if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg))
break;
- const MachineOperand &MO = Copy->getOperand(CopySrcIdx);
- assert(MO.isReg() && "Copies must be between registers.");
- Src = MO.getReg();
-
+ Src = ValTracker.getReg();
+ SrcSubReg = CopySrcSubReg;
+
+ // Do not extend the live-ranges of physical registers as they add
+ // constraints to the register allocator.
+ // Moreover, if we want to extend the live-range of a physical register,
+ // unlike SSA virtual register, we will have to check that they are not
+ // redefine before the related use.
if (TargetRegisterInfo::isPhysicalRegister(Src))
break;
const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
- SrcSubReg = MO.getSubReg();
// If this source does not incur a cross register bank copy, use it.
ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC,
SrcSubReg);
- // Follow the chain of copies: get the definition of Src.
- Copy = MRI->getVRegDef(Src);
- } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast()));
+ } while (!ShouldRewrite);
// If we did not find a more suitable source, there is nothing to optimize.
if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg())
@@ -483,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) {
MRI->replaceRegWith(Def, NewVR);
MRI->clearKillFlags(NewVR);
+ // We extended the lifetime of Src.
+ // Clear the kill flags to account for that.
+ MRI->clearKillFlags(Src);
MI->eraseFromParent();
++NumCopiesBitcasts;
return true;
@@ -673,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isCopy() && "Invalid definition");
+ // Copy instruction are supposed to be: Def = Src.
+ // If someone breaks this assumption, bad things will happen everywhere.
+ assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands");
+
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of src.
+ // Bails as we do not support composing subreg yet.
+ return false;
+ // Otherwise, we want the whole source.
+ SrcIdx = 1;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isBitcast() && "Invalid definition");
+
+ // Bail if there are effects that a plain copy will not expose.
+ if (Def->hasUnmodeledSideEffects())
+ return false;
+
+ // Bitcasts with more than one def are not supported.
+ if (Def->getDesc().getNumDefs() != 1)
+ return false;
+ if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
+ // If we look for a different subreg, it means we want a subreg of the src.
+ // Bails as we do not support composing subreg yet.
+ return false;
+
+ SrcIdx = Def->getDesc().getNumOperands();
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
+ ++OpIdx) {
+ const MachineOperand &MO = Def->getOperand(OpIdx);
+ if (!MO.isReg() || !MO.getReg())
+ continue;
+ assert(!MO.isDef() && "We should have skipped all the definitions by now");
+ if (SrcIdx != EndOpIdx)
+ // Multiple sources?
+ return false;
+ SrcIdx = OpIdx;
+ }
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isRegSequence() && "Invalid definition");
+
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bails out.
+ // The case we are checking is Def.<subreg> = REG_SEQUENCE.
+ // This should almost never happen as the SSA property is tracked at
+ // the register level (as opposed to the subreg level).
+ // I.e.,
+ // Def.sub0 =
+ // Def.sub1 =
+ // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
+ // Def. Thus, it must not be generated.
+ // However, some code could theoretically generates a single
+ // Def.sub0 (i.e, not defining the other subregs) and we would
+ // have this case.
+ // If we can ascertain (or force) that this never happens, we could
+ // turn that into an assertion.
+ return false;
+
+ // We are looking at:
+ // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
+ // Check if one of the operand defines the subreg we are interested in.
+ for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands();
+ OpIdx != EndOpIdx; OpIdx += 2) {
+ const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1);
+ assert(MOSubIdx.isImm() &&
+ "One of the subindex of the reg_sequence is not an immediate");
+ if (MOSubIdx.getImm() == DefSubReg) {
+ assert(Def->getOperand(OpIdx).isReg() &&
+ "One of the source of the reg_sequence is not a register");
+ SrcIdx = OpIdx;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+ }
+ }
+
+ // If the subreg we are tracking is super-defined by another subreg,
+ // we could follow this value. However, this would require to compose
+ // the subreg and we do not do that for now.
+ return false;
+}
+
+bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isInsertSubreg() && "Invalid definition");
+ if (Def->getOperand(DefIdx).getSubReg())
+ // If we are composing subreg, bails out.
+ // Same remark as getNextSourceFromRegSequence.
+ // I.e., this may be turned into an assert.
+ return false;
+
+ // We are looking at:
+ // Def = INSERT_SUBREG v0, v1, sub1
+ // There are two cases:
+ // 1. DefSubReg == sub1, get v1.
+ // 2. DefSubReg != sub1, the value may be available through v0.
+
+ // #1 Check if the inserted register matches the require sub index.
+ unsigned InsertedSubReg = Def->getOperand(3).getImm();
+ if (InsertedSubReg == DefSubReg) {
+ SrcIdx = 2;
+ SrcSubReg = Def->getOperand(SrcIdx).getSubReg();
+ return true;
+ }
+ // #2 Otherwise, if the sub register we are looking for is not partial
+ // defined by the inserted element, we can look through the main
+ // register (v0).
+ // To check the overlapping we need a MRI and a TRI.
+ if (!MRI)
+ return false;
+
+ const MachineOperand &MODef = Def->getOperand(DefIdx);
+ const MachineOperand &MOBase = Def->getOperand(1);
+ // If the result register (Def) and the base register (v0) do not
+ // have the same register class or if we have to compose
+ // subregisters, bails out.
+ if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) ||
+ MOBase.getSubReg())
+ return false;
+
+ // Get the TRI and check if inserted sub register overlaps with the
+ // sub register we are tracking.
+ const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+ if (!TRI ||
+ (TRI->getSubRegIndexLaneMask(DefSubReg) &
+ TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0)
+ return false;
+ // At this point, the value is available in v0 via the same subreg
+ // we used for Def.
+ SrcIdx = 1;
+ SrcSubReg = DefSubReg;
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isExtractSubreg() && "Invalid definition");
+ // We are looking at:
+ // Def = EXTRACT_SUBREG v0, sub0
+
+ // Bails if we have to compose sub registers.
+ // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
+ if (DefSubReg)
+ return false;
+
+ // Bails if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
+ if (Def->getOperand(1).getSubReg())
+ return false;
+ // Otherwise, the value is available in the v0.sub0.
+ SrcIdx = 1;
+ SrcSubReg = Def->getOperand(2).getImm();
+ return true;
+}
+
+bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ assert(Def->isSubregToReg() && "Invalid definition");
+ // We are looking at:
+ // Def = SUBREG_TO_REG Imm, v0, sub0
+
+ // Bails if we have to compose sub registers.
+ // If DefSubReg != sub0, we would have to check that all the bits
+ // we track are included in sub0 and if yes, we would have to
+ // determine the right subreg in v0.
+ if (DefSubReg != Def->getOperand(3).getImm())
+ return false;
+ // Bails if we have to compose sub registers.
+ // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
+ if (Def->getOperand(2).getSubReg())
+ return false;
+
+ SrcIdx = 2;
+ SrcSubReg = Def->getOperand(3).getImm();
+ return true;
+}
+
+bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) {
+ assert(Def && "This method needs a valid definition");
+
+ assert(
+ (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
+ Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
+ if (Def->isCopy())
+ return getNextSourceFromCopy(SrcIdx, SrcSubReg);
+ if (Def->isBitcast())
+ return getNextSourceFromBitcast(SrcIdx, SrcSubReg);
+ // All the remaining cases involve "complex" instructions.
+ // Bails if we did not ask for the advanced tracking.
+ if (!UseAdvancedTracking)
+ return false;
+ if (Def->isRegSequence())
+ return getNextSourceFromRegSequence(SrcIdx, SrcSubReg);
+ if (Def->isInsertSubreg())
+ return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg);
+ if (Def->isExtractSubreg())
+ return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg);
+ if (Def->isSubregToReg())
+ return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg);
+ return false;
+}
+
+const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx,
+ unsigned &SrcSubReg) {
+ // If we reach a point where we cannot move up in the use-def chain,
+ // there is nothing we can get.
+ if (!Def)
+ return nullptr;
+
+ const MachineInstr *PrevDef = nullptr;
+ // Try to find the next source.
+ if (getNextSourceImpl(SrcIdx, SrcSubReg)) {
+ // Update definition, definition index, and subregister for the
+ // next call of getNextSource.
+ const MachineOperand &MO = Def->getOperand(SrcIdx);
+ assert(MO.isReg() && !MO.isDef() && "Source is invalid");
+ // Update the current register.
+ Reg = MO.getReg();
+ // Update the return value before moving up in the use-def chain.
+ PrevDef = Def;
+ // If we can still move up in the use-def chain, move to the next
+ // defintion.
+ if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Def = MRI->getVRegDef(Reg);
+ DefIdx = MRI->def_begin(Reg).getOperandNo();
+ DefSubReg = SrcSubReg;
+ return PrevDef;
+ }
+ }
+ // If we end up here, this means we will not be able to find another source
+ // for the next iteration.
+ // Make sure any new call to getNextSource bails out early by cutting the
+ // use-def chain.
+ Def = nullptr;
+ return PrevDef;
+}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index c74a42f..b98d210 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -160,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
replaceFrameIndices(Fn);
// If register scavenging is needed, as we've enabled doing it as a
- // post-pass, scavenge the virtual registers that frame index elimiation
+ // post-pass, scavenge the virtual registers that frame index elimination
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
scavengeFrameVirtualRegs(Fn);
@@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
}
}
- if (CSI.empty())
- return; // Early exit if no callee saved registers are modified!
+ if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+ // If target doesn't implement this, use generic code.
- unsigned NumFixedSpillSlots;
- const TargetFrameLowering::SpillSlot *FixedSpillSlots =
- TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+ if (CSI.empty())
+ return; // Early exit if no callee saved registers are modified!
- // Now that we know which registers need to be saved and restored, allocate
- // stack slots for them.
- for (std::vector<CalleeSavedInfo>::iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ unsigned NumFixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+ TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
- int FrameIdx;
- if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
- I->setFrameIdx(FrameIdx);
- continue;
- }
+ // Now that we know which registers need to be saved and restored, allocate
+ // stack slots for them.
+ for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+ I != E; ++I) {
+ unsigned Reg = I->getReg();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
- // Check to see if this physreg must be spilled to a particular stack slot
- // on this target.
- const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
- while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
- FixedSlot->Reg != Reg)
- ++FixedSlot;
-
- if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
- // Nope, just spill it anywhere convenient.
- unsigned Align = RC->getAlignment();
- unsigned StackAlign = TFI->getStackAlignment();
-
- // We may not be able to satisfy the desired alignment specification of
- // the TargetRegisterClass if the stack alignment is smaller. Use the
- // min.
- Align = std::min(Align, StackAlign);
- FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
- if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
- if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
- } else {
- // Spill it to the stack where we must.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
- }
+ int FrameIdx;
+ if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+ I->setFrameIdx(FrameIdx);
+ continue;
+ }
+
+ // Check to see if this physreg must be spilled to a particular stack slot
+ // on this target.
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+ while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+ FixedSlot->Reg != Reg)
+ ++FixedSlot;
+
+ if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+ // Nope, just spill it anywhere convenient.
+ unsigned Align = RC->getAlignment();
+ unsigned StackAlign = TFI->getStackAlignment();
+
+ // We may not be able to satisfy the desired alignment specification of
+ // the TargetRegisterClass if the stack alignment is smaller. Use the
+ // min.
+ Align = std::min(Align, StackAlign);
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+ if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+ } else {
+ // Spill it to the stack where we must.
+ FrameIdx =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+ }
- I->setFrameIdx(FrameIdx);
+ I->setFrameIdx(FrameIdx);
+ }
}
MFI->setCalleeSavedInfo(CSI);
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index aa7c178..901b993 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -44,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <queue>
using namespace llvm;
@@ -79,6 +80,12 @@ ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
cl::desc("Exhaustive Search for registers bypassing the depth "
"and interference cutoffs of last chance recoloring"));
+static cl::opt<bool> EnableLocalReassignment(
+ "enable-local-reassign", cl::Hidden,
+ cl::desc("Local reassignment can yield better allocation decisions, but "
+ "may be compile time intensive"),
+ cl::init(false));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -285,6 +292,10 @@ class RAGreedy : public MachineFunctionPass,
/// Callee-save register cost, calculated once per machine function.
BlockFrequency CSRCost;
+ /// Run or not the local reassignment heuristic. This information is
+ /// obtained from the TargetSubtargetInfo.
+ bool EnableLocalReassign;
+
public:
RAGreedy();
@@ -731,7 +742,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Evicting another local live range in this case could lead to suboptimal
// coloring.
if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
- !canReassign(*Intf, PhysReg)) {
+ (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
return false;
}
}
@@ -2308,9 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
<< "********** Function: " << mf.getName() << '\n');
MF = &mf;
- TRI = MF->getTarget().getRegisterInfo();
- TII = MF->getTarget().getInstrInfo();
+ const TargetMachine &TM = MF->getTarget();
+ TRI = TM.getRegisterInfo();
+ TII = TM.getInstrInfo();
RCI.runOnMachineFunction(mf);
+
+ EnableLocalReassign = EnableLocalReassignment ||
+ TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel());
+
if (VerifyEnabled)
MF->verify(this, "Before greedy register allocator");
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index b2909e0..617e459 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
const TargetRegisterInfo *TRI) {
bool Empty = true;
@@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure,
dbgs() << "\n";
}
+LLVM_DUMP_METHOD
void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << "Max Pressure: ";
dumpRegSetPressure(MaxSetPressure, TRI);
@@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
dbgs() << '\n';
}
+LLVM_DUMP_METHOD
void RegPressureTracker::dump() const {
if (!isTopClosed() || !isBottomClosed()) {
dbgs() << "Curr Pressure: ";
@@ -75,7 +77,6 @@ void RegPressureTracker::dump() const {
}
P.dump(TRI);
}
-#endif
/// Increase the current pressure as impacted by these registers and bump
/// the high water mark if needed.
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 92a9a30..0f8b21c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1508,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
}
}
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
void ILPValue::print(raw_ostream &OS) const {
OS << InstrCount << " / " << Length << " = ";
if (!Length)
@@ -1517,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const {
OS << format("%g", ((double)InstrCount / Length));
}
+LLVM_DUMP_METHOD
void ILPValue::dump() const {
dbgs() << *this << '\n';
}
namespace llvm {
+LLVM_DUMP_METHOD
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
}
} // namespace llvm
-#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2d2fd53..7c42e4d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -167,9 +167,18 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
- SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
+ /// load.
+ ///
+ /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
+ /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
+ /// \param EltNo index of the vector element to load.
+ /// \param OriginalLoad load that EVE came from to be replaced.
+ /// \returns EVE on success SDValue() on failure.
+ SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
@@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
- ConstantSDNode *CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+ // FIXME: We blindly ignore splats which include undef which is overly
+ // pessimistic.
+ if (CN && UndefElements.none() &&
+ CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
- SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
- // For an operand generating multiple values, one of the values may
- // become dead allowing further simplification (e.g. split index
- // arithmetic from an indexed load).
- if (Op->hasOneUse() || Op->getNumValues() > 1)
- AddToWorkList(Op);
- }
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+ if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+ AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
DAG.DeleteNode(TLO.Old.getNode());
}
}
@@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) {
// Constant operands are canonicalized to RHS.
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
- SDValue Ops[] = { N1, N0 };
- SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
- Ops);
+ SDValue Ops[] = {N1, N0};
+ SDNode *CSENode;
+ if (const BinaryWithFlagsSDNode *BinNode =
+ dyn_cast<BinaryWithFlagsSDNode>(N)) {
+ CSENode = DAG.getNodeIfExists(
+ N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ } else {
+ CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
+ }
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// If setcc produces all-one true value then:
// (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
if (N1CV && N1CV->isConstant()) {
- if (N0.getOpcode() == ISD::AND &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ if (N0.getOpcode() == ISD::AND) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
- if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) {
+ if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
+ TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV);
if (C.getNode())
return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
@@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
// fold (select C, 0, 1) -> (xor C, 1)
+ // We can't do this reliably if integer based booleans have different contents
+ // to floating point based booleans. This is because we can't tell whether we
+ // have an integer-based boolean or a floating-point-based boolean unless we
+ // can find the SETCC that produced it and inspect its operands. This is
+ // fairly easy if C is the SETCC node, but it can potentially be
+ // undiscoverable (or not reasonably discoverable). For example, it could be
+ // in another basic block or it could require searching a complicated
+ // expression.
if (VT.isInteger() &&
- (VT0 == MVT::i1 ||
- (VT0.isInteger() &&
- TLI.getBooleanContents(false) ==
- TargetLowering::ZeroOrOneBooleanContent)) &&
+ (VT0 == MVT::i1 || (VT0.isInteger() &&
+ TLI.getBooleanContents(false, false) ==
+ TLI.getBooleanContents(false, true) &&
+ TLI.getBooleanContents(false, false) ==
+ TargetLowering::ZeroOrOneBooleanContent)) &&
N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
SDValue XORNode;
if (VT == VT0)
@@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// fold selects based on a setcc into other things, such as min/max/abs
if (N0.getOpcode() == ISD::SETCC) {
- // FIXME:
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
- TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+ if ((!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
+ TLI.isOperationLegal(ISD::SELECT_CC, VT))
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
N0.getOperand(0), N0.getOperand(1),
N1, N2, N0.getOperand(2));
@@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
return std::make_pair(Lo, Hi);
}
+// This function assumes all the vselect's arguments are CONCAT_VECTOR
+// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
+static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ MVT VT = N->getSimpleValueType(0);
+ int NumElems = VT.getVectorNumElements();
+ assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ RHS.getOpcode() == ISD::CONCAT_VECTORS &&
+ Cond.getOpcode() == ISD::BUILD_VECTOR);
+
+ // We're sure we have an even number of elements due to the
+ // concat_vectors we have as arguments to vselect.
+ // Skip BV elements until we find one that's not an UNDEF
+ // After we find an UNDEF element, keep looping until we get to half the
+ // length of the BV and see if all the non-undef nodes are the same.
+ ConstantSDNode *BottomHalf = nullptr;
+ for (int i = 0; i < NumElems / 2; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (BottomHalf == nullptr)
+ BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != BottomHalf)
+ return SDValue();
+ }
+
+ // Do the same for the second half of the BuildVector
+ ConstantSDNode *TopHalf = nullptr;
+ for (int i = NumElems / 2; i < NumElems; ++i) {
+ if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
+ continue;
+
+ if (TopHalf == nullptr)
+ TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
+ else if (Cond->getOperand(i).getNode() != TopHalf)
+ return SDValue();
+ }
+
+ assert(TopHalf && BottomHalf &&
+ "One half of the selector was all UNDEFs and the other was all the "
+ "same value. This should have been addressed before this function.");
+ return DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, VT,
+ BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (ISD::isBuildVectorAllZeros(N0.getNode()))
return N2;
+ // The ConvertSelectToConcatVector function is assuming both the above
+ // checks for (vselect (build_vector all{ones,zeros) ...) have been made
+ // and addressed.
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
+ N2.getOpcode() == ISD::CONCAT_VECTORS &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ SDValue CV = ConvertSelectToConcatVector(N, DAG);
+ if (CV.getNode())
+ return CV;
+ }
+
return SDValue();
}
@@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
}
if (N0.getOpcode() == ISD::SETCC) {
+ EVT N0VT = N0.getOperand(0).getValueType();
// sext(setcc) -> sext_in_reg(vsetcc) for vectors.
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations &&
- TLI.getBooleanContents(true) ==
- TargetLowering::ZeroOrNegativeOneBooleanContent) {
- EVT N0VT = N0.getOperand(0).getValueType();
+ TLI.getBooleanContents(N0VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
// On some architectures (such as SSE/NEON/etc) the SETCC result type is
// of the same size as the compared operands. Only optimize sext(setcc())
// if this is the case.
@@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType()) ==
+ TLI.hasBigEndianPartOrdering(VT) &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
@@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
}
// The next optimizations are desirable only if SELECT_CC can be lowered.
- // Check against MVT::Other for SELECT_CC, which is a workaround for targets
- // having to say they don't support SELECT_CC on every type the DAG knows
- // about, since there is no way to mark an opcode illegal at all value types
- // (See also visitSELECT)
- if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) {
+ if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
@@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// \brief Return the base-pointer arithmetic from an indexed \p LD.
-SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- assert(AM != ISD::UNINDEXED);
- SDValue BP = LD->getOperand(1);
- SDValue Inc = LD->getOperand(2);
- unsigned Opc =
- (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
- return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0)) {
+ if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- SDValue Index;
- if (N->hasAnyUseOfValue(1)) {
- Index = SplitIndexingFromLoad(LD);
- // Try to fold the base pointer arithmetic into subsequent loads and
- // stores.
- AddUsersToWorkList(N);
- } else
- Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
@@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+ DAG.getUNDEF(N->getValueType(1)));
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return SDValue();
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ // Canonicalize insert_vector_elt dag nodes.
+ // Example:
+ // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
+ // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
+ //
+ // Do this only if the child insert_vector node has one use; also
+ // do this only if indices are both constants and Idx1 < Idx0.
+ if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
+ && isa<ConstantSDNode>(InVec.getOperand(2))) {
+ unsigned OtherElt =
+ cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
+ if (Elt < OtherElt) {
+ // Swap nodes.
+ SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+ InVec.getOperand(0), InVal, EltNo);
+ AddToWorkList(NewOp.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
+ VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
+ }
+ }
+
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
// vector elements.
@@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
+SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
+ SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
+ EVT ResultVT = EVE->getValueType(0);
+ EVT VecEltVT = InVecVT.getVectorElementType();
+ unsigned Align = OriginalLoad->getAlignment();
+ unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment(
+ VecEltVT.getTypeForEVT(*DAG.getContext()));
+
+ if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
+ return SDValue();
+
+ Align = NewAlign;
+
+ SDValue NewPtr = OriginalLoad->getBasePtr();
+ SDValue Offset;
+ EVT PtrType = NewPtr.getValueType();
+ MachinePointerInfo MPI;
+ if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
+ int Elt = ConstEltNo->getZExtValue();
+ unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
+ if (TLI.isBigEndian())
+ PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
+ Offset = DAG.getConstant(PtrOff, PtrType);
+ MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ } else {
+ Offset = DAG.getNode(
+ ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
+ DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
+ if (TLI.isBigEndian())
+ Offset = DAG.getNode(
+ ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
+ DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
+ MPI = OriginalLoad->getPointerInfo();
+ }
+ NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
+
+ // The replacement we need to do here is a little tricky: we need to
+ // replace an extractelement of a load with a load.
+ // Use ReplaceAllUsesOfValuesWith to do the replacement.
+ // Note that this replacement assumes that the extractvalue is the only
+ // use of the load; that's okay because we don't want to perform this
+ // transformation in other cases anyway.
+ SDValue Load;
+ SDValue Chain;
+ if (ResultVT.bitsGT(VecEltVT)) {
+ // If the result type of vextract is wider than the load, then issue an
+ // extending load instead.
+ ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT)
+ ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD;
+ Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(),
+ NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(),
+ OriginalLoad->isNonTemporal(), Align,
+ OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ } else {
+ Load = DAG.getLoad(
+ VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
+ OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
+ OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo());
+ Chain = Load.getValue(1);
+ if (ResultVT.bitsLT(VecEltVT))
+ Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
+ else
+ Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
+ }
+ WorkListRemover DeadNodes(*this);
+ SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
+ SDValue To[] = { Load, Chain };
+ DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Since we're explicitly calling ReplaceAllUses, add the new node to the
+ // worklist explicitly as well.
+ AddToWorkList(Load.getNode());
+ AddUsersToWorkList(Load.getNode()); // Add users too
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorkList(EVE);
+ ++OpsNarrowed;
+ return SDValue(EVE, 0);
+}
+
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// (vextract (scalar_to_vector val, 0) -> val
SDValue InVec = N->getOperand(0);
@@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
}
+ bool BCNumEltsChanged = false;
+ EVT ExtVT = VT.getVectorElementType();
+ EVT LVT = ExtVT;
+
+ // If the result of load has to be truncated, then it's not necessarily
+ // profitable.
+ if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+ return SDValue();
+
+ if (InVec.getOpcode() == ISD::BITCAST) {
+ // Don't duplicate a load with other uses.
+ if (!InVec.hasOneUse())
+ return SDValue();
+
+ EVT BCVT = InVec.getOperand(0).getValueType();
+ if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+ return SDValue();
+ if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+ BCNumEltsChanged = true;
+ InVec = InVec.getOperand(0);
+ ExtVT = BCVT.getVectorElementType();
+ }
+
+ // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
+ if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
+ ISD::isNormalLoad(InVec.getNode())) {
+ SDValue Index = N->getOperand(1);
+ if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
+ OrigLoad);
+ }
+
// Perform only after legalization to ensure build_vector / vector_shuffle
// optimizations have already been done.
if (!LegalOperations) return SDValue();
@@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ConstEltNo) {
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- bool NewLoad = false;
- bool BCNumEltsChanged = false;
- EVT ExtVT = VT.getVectorElementType();
- EVT LVT = ExtVT;
-
- // If the result of load has to be truncated, then it's not necessarily
- // profitable.
- if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
- return SDValue();
-
- if (InVec.getOpcode() == ISD::BITCAST) {
- // Don't duplicate a load with other uses.
- if (!InVec.hasOneUse())
- return SDValue();
-
- EVT BCVT = InVec.getOperand(0).getValueType();
- if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
- return SDValue();
- if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
- BCNumEltsChanged = true;
- InVec = InVec.getOperand(0);
- ExtVT = BCVT.getVectorElementType();
- NewLoad = true;
- }
LoadSDNode *LN0 = nullptr;
const ShuffleVectorSDNode *SVN = nullptr;
@@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+ EltNo = DAG.getConstant(Elt, EltNo.getValueType());
}
}
@@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (Elt == -1)
return DAG.getUNDEF(LVT);
- unsigned Align = LN0->getAlignment();
- if (NewLoad) {
- // Check the resultant load doesn't need a higher alignment than the
- // original load.
- unsigned NewAlign =
- TLI.getDataLayout()
- ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
- return SDValue();
-
- Align = NewAlign;
- }
-
- SDValue NewPtr = LN0->getBasePtr();
- unsigned PtrOff = 0;
-
- if (Elt) {
- PtrOff = LVT.getSizeInBits() * Elt / 8;
- EVT PtrType = NewPtr.getValueType();
- if (TLI.isBigEndian())
- PtrOff = VT.getSizeInBits() / 8 - PtrOff;
- NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr,
- DAG.getConstant(PtrOff, PtrType));
- }
-
- // The replacement we need to do here is a little tricky: we need to
- // replace an extractelement of a load with a load.
- // Use ReplaceAllUsesOfValuesWith to do the replacement.
- // Note that this replacement assumes that the extractvalue is the only
- // use of the load; that's okay because we don't want to perform this
- // transformation in other cases anyway.
- SDValue Load;
- SDValue Chain;
- if (NVT.bitsGT(LVT)) {
- // If the result type of vextract is wider than the load, then issue an
- // extending load instead.
- ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
- ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(),
- NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
- LVT, LN0->isVolatile(), LN0->isNonTemporal(),
- Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- } else {
- Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr,
- LN0->getPointerInfo().getWithOffset(PtrOff),
- LN0->isVolatile(), LN0->isNonTemporal(),
- LN0->isInvariant(), Align, LN0->getTBAAInfo());
- Chain = Load.getValue(1);
- if (NVT.bitsLT(LVT))
- Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load);
- else
- Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load);
- }
- WorkListRemover DeadNodes(*this);
- SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
- SDValue To[] = { Load, Chain };
- DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
- // Since we're explcitly calling ReplaceAllUses, add the new node to the
- // worklist explicitly as well.
- AddToWorkList(Load.getNode());
- AddUsersToWorkList(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorkList(N);
- return SDValue(N, 0);
+ return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
}
return SDValue();
@@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Opnds;
unsigned BuildVecNumElts = N0.getNumOperands();
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N0.getOperand(i));
- for (unsigned i = 0; i != BuildVecNumElts; ++i)
- Opnds.push_back(N1.getOperand(i));
+ EVT SclTy0 = N0.getOperand(0)->getValueType(0);
+ EVT SclTy1 = N1.getOperand(0)->getValueType(0);
+ if (SclTy0.isFloatingPoint()) {
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N0.getOperand(i));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(N1.getOperand(i));
+ } else {
+ // If BUILD_VECTOR are from built from integer, they may have different
+ // operand types. Get the smaller type and truncate all operands to it.
+ EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1;
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N0.getOperand(i)));
+ for (unsigned i = 0; i != BuildVecNumElts; ++i)
+ Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy,
+ N1.getOperand(i)));
+ }
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
@@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
// If this shuffle node is simply a swizzle of another shuffle node,
- // and it reverses the swizzle of the previous shuffle then we can
- // optimize shuffle(shuffle(x, undef), undef) -> x.
+ // then try to simplify it.
if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
N1.getOpcode() == ISD::UNDEF) {
ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
- // Shuffle nodes can only reverse shuffles with a single non-undef value.
- if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
- return SDValue();
-
// The incoming shuffle must be of the same type as the result of the
// current shuffle.
assert(OtherSV->getOperand(0).getValueType() == VT &&
"Shuffle types don't match");
+ SmallVector<int, 4> Mask;
+ // Compute the combined shuffle mask.
for (unsigned i = 0; i != NumElts; ++i) {
int Idx = SVN->getMaskElt(i);
assert(Idx < (int)NumElts && "Index references undef operand");
@@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// shuffle. Adopt the incoming index.
if (Idx >= 0)
Idx = OtherSV->getMaskElt(Idx);
+ Mask.push_back(Idx);
+ }
+
+ bool CommuteOperands = false;
+ if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
+ // To be valid, the combine shuffle mask should only reference elements
+ // from one of the two vectors in input to the inner shufflevector.
+ bool IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // See if the combined mask only reference undefs or elements coming
+ // from the first shufflevector operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts;
+
+ if (!IsValidMask) {
+ IsValidMask = true;
+ for (unsigned i = 0; i != NumElts && IsValidMask; ++i)
+ // Check that all the elements come from the second shuffle operand.
+ IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts;
+ CommuteOperands = IsValidMask;
+ }
- // The combined shuffle must map each index to itself.
- if (Idx >= 0 && (unsigned)Idx != i)
+ // Early exit if the combined shuffle mask is not valid.
+ if (!IsValidMask)
return SDValue();
}
- return OtherSV->getOperand(0);
+ // See if this pair of shuffles can be safely folded according to either
+ // of the following rules:
+ // shuffle(shuffle(x, y), undef) -> x
+ // shuffle(shuffle(x, undef), undef) -> x
+ // shuffle(shuffle(x, y), undef) -> y
+ bool IsIdentityMask = true;
+ unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0;
+ for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) {
+ // Skip Undefs.
+ if (Mask[i] < 0)
+ continue;
+
+ // The combined shuffle must map each index to itself.
+ IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex;
+ }
+
+ if (IsIdentityMask) {
+ if (CommuteOperands)
+ // optimize shuffle(shuffle(x, y), undef) -> y.
+ return OtherSV->getOperand(1);
+
+ // optimize shuffle(shuffle(x, undef), undef) -> x
+ // optimize shuffle(shuffle(x, y), undef) -> x
+ return OtherSV->getOperand(0);
+ }
+
+ // It may still be beneficial to combine the two shuffles if the
+ // resulting shuffle is legal.
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (!CommuteOperands)
+ // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+ &Mask[0]);
+
+ // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
+ &Mask[0]);
+ }
}
return SDValue();
@@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
+ // Type legalization might introduce new shuffles in the DAG.
+ // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
+ // -> (shuffle (VBinOp (A, B)), Undef, Mask).
+ if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
+ isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
+ LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
+ RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
+ ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
+ ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
+
+ if (SVN0->getMask().equals(SVN1->getMask())) {
+ EVT VT = N->getValueType(0);
+ SDValue UndefVector = LHS.getOperand(1);
+ SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
+ LHS.getOperand(0), RHS.getOperand(0));
+ AddUsersToWorkList(N);
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
+ &SVN0->getMask()[0]);
+ }
+ }
+
return SDValue();
}
@@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
- TLI.getBooleanContents(N0.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent) {
+ TLI.getBooleanContents(N0.getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent) {
// If the caller doesn't want us to simplify this into a zext of a compare,
// don't do it.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 99931c1..445572a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -42,12 +42,15 @@
#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
@@ -558,6 +561,107 @@ bool FastISel::SelectGetElementPtr(const User *I) {
return true;
}
+/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands
+/// to a stackmap or patchpoint machine instruction.
+bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
+ const CallInst *CI, unsigned StartIdx) {
+ for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ Value *Val = CI->getArgOperand(i);
+ // Check for constants and encode them with a StackMaps::ConstantOp prefix.
+ if (auto *C = dyn_cast<ConstantInt>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(C->getSExtValue()));
+ } else if (isa<ConstantPointerNull>(Val)) {
+ Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
+ Ops.push_back(MachineOperand::CreateImm(0));
+ } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
+ // Values coming from a stack location also require a sepcial encoding,
+ // but that is added later on by the target specific frame index
+ // elimination implementation.
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end())
+ Ops.push_back(MachineOperand::CreateFI(SI->second));
+ else
+ return false;
+ } else {
+ unsigned Reg = getRegForValue(Val);
+ if (Reg == 0)
+ return false;
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ }
+ }
+
+ return true;
+}
+
+bool FastISel::SelectStackmap(const CallInst *I) {
+ // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
+ // [live variables...])
+ assert(I->getCalledFunction()->getReturnType()->isVoidTy() &&
+ "Stackmap cannot return a value.");
+
+ // The stackmap intrinsic only records the live variables (the arguments
+ // passed to it) and emits NOPS (if requested). Unlike the patchpoint
+ // intrinsic, this won't be lowered to a function call. This means we don't
+ // have to worry about calling conventions and target-specific lowering code.
+ // Instead we perform the call lowering right here.
+ //
+ // CALLSEQ_START(0)
+ // STACKMAP(id, nbytes, ...)
+ // CALLSEQ_END(0, 0)
+ //
+ SmallVector<MachineOperand, 32> Ops;
+
+ // Add the <id> and <numBytes> constants.
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) &&
+ "Expected a constant integer.");
+ const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos));
+ Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue()));
+
+ assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) &&
+ "Expected a constant integer.");
+ const auto *NumBytes =
+ cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos));
+ Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue()));
+
+ // Push live variables for the stack map (skipping the first two arguments
+ // <id> and <numBytes>).
+ if (!addStackMapLiveVars(Ops, I, 2))
+ return false;
+
+ // We are not adding any register mask info here, because the stackmap doesn't
+ // clobber anything.
+
+ // Add scratch registers as implicit def and early clobber.
+ CallingConv::ID CC = I->getCallingConv();
+ const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
+ for (unsigned i = 0; ScratchRegs[i]; ++i)
+ Ops.push_back(MachineOperand::CreateReg(
+ ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
+ /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+
+ // Issue CALLSEQ_START
+ unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
+ .addImm(0);
+
+ // Issue STACKMAP.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::STACKMAP));
+ for (auto const &MO : Ops)
+ MIB.addOperand(MO);
+
+ // Issue CALLSEQ_END
+ unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
+ .addImm(0).addImm(0);
+
+ // Inform the Frame Information that we have a stackmap in this function.
+ FuncInfo.MF->getFrameInfo()->setHasStackMap();
+
+ return true;
+}
+
bool FastISel::SelectCall(const User *I) {
const CallInst *Call = cast<CallInst>(I);
@@ -713,6 +817,8 @@ bool FastISel::SelectCall(const User *I) {
UpdateValueMap(Call, ResultReg);
return true;
}
+ case Intrinsic::experimental_stackmap:
+ return SelectStackmap(Call);
}
// Usually, it does not make sense to initialize a value,
@@ -879,7 +985,6 @@ FastISel::SelectInstruction(const Instruction *I) {
/// the CFG.
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
-
if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// For more accurate line information if this is the only instruction
@@ -890,7 +995,11 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
- FuncInfo.MBB->addSuccessor(MSucc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
+ MSucc->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
}
/// SelectFNeg - Emit an FNeg operation.
@@ -1101,6 +1210,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
FastISel::FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FuncInfo(funcInfo),
+ MF(funcInfo.MF),
MRI(FuncInfo.MF->getRegInfo()),
MFI(*FuncInfo.MF->getFrameInfo()),
MCP(*FuncInfo.MF->getConstantPool()),
@@ -1635,3 +1745,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) {
return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1));
}
+MachineMemOperand *
+FastISel::createMachineMemOperandFor(const Instruction *I) const {
+ const Value *Ptr;
+ Type *ValTy;
+ unsigned Alignment;
+ unsigned Flags;
+ bool IsVolatile;
+
+ if (const auto *LI = dyn_cast<LoadInst>(I)) {
+ Alignment = LI->getAlignment();
+ IsVolatile = LI->isVolatile();
+ Flags = MachineMemOperand::MOLoad;
+ Ptr = LI->getPointerOperand();
+ ValTy = LI->getType();
+ } else if (const auto *SI = dyn_cast<StoreInst>(I)) {
+ Alignment = SI->getAlignment();
+ IsVolatile = SI->isVolatile();
+ Flags = MachineMemOperand::MOStore;
+ Ptr = SI->getPointerOperand();
+ ValTy = SI->getValueOperand()->getType();
+ } else {
+ return nullptr;
+ }
+
+ bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr;
+ bool IsInvariant = I->getMetadata("invariant.load") != nullptr;
+ const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa);
+ const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
+
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0.
+ Alignment = DL.getABITypeAlignment(ValTy);
+
+ unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy);
+
+ if (IsVolatile)
+ Flags |= MachineMemOperand::MOVolatile;
+ if (IsNonTemporal)
+ Flags |= MachineMemOperand::MONonTemporal;
+ if (IsInvariant)
+ Flags |= MachineMemOperand::MOInvariant;
+
+ return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size,
+ Alignment, TBAAInfo, Ranges);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a59e895..c0e8c8c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2060,7 +2060,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2095,7 +2095,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2129,7 +2129,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2266,7 +2266,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
SDLoc dl(Node);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2381,7 +2381,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(TLI.getLibcallCallingConv(LC),
- Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0);
+ Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2650,12 +2650,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+ // A larger signed type can hold all unsigned values of the requested type,
+ // so using FP_TO_SINT is valid
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
OpToUse = ISD::FP_TO_SINT;
break;
}
- if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+ // However, if the value may be < 0.0, we *must* use some FP_TO_SINT.
+ if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
OpToUse = ISD::FP_TO_UINT;
break;
}
@@ -2996,8 +2999,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()),
- &Args, 0);
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()), std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -3007,14 +3010,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::ATOMIC_LOAD: {
// There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
SDValue Zero = DAG.getConstant(0, Node->getValueType(0));
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- cast<AtomicSDNode>(Node)->getMemoryVT(),
- Node->getOperand(0),
- Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
Results.push_back(Swap.getValue(1));
break;
@@ -3051,6 +3054,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp.second);
break;
}
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
+ // splits out the success value as a comparison. Expanding the resulting
+ // ATOMIC_CMP_SWAP will produce a libcall.
+ SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
+ Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
+ Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getSuccessOrdering(),
+ cast<AtomicSDNode>(Node)->getFailureOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
+
+ SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1),
+ Res, Node->getOperand(2), ISD::SETEQ);
+
+ Results.push_back(Res.getValue(0));
+ Results.push_back(Success);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -3074,7 +3098,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0);
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3128,6 +3153,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(0), Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::FP_TO_SINT: {
+ EVT VT = Node->getOperand(0).getValueType();
+ EVT NVT = Node->getValueType(0);
+
+ // FIXME: Only f32 to i64 conversions are supported.
+ if (VT != MVT::f32 || NVT != MVT::i64)
+ break;
+
+ // Expand f32 -> i64 conversion
+ // This algorithm comes from compiler-rt's implementation of fixsfdi:
+ // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits());
+ SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT);
+ SDValue ExponentLoBit = DAG.getConstant(23, IntVT);
+ SDValue Bias = DAG.getConstant(127, IntVT);
+ SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()),
+ IntVT);
+ SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT);
+ SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT);
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
+
+ SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
+ DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT)));
+ SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
+
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
+ DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT)));
+ Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
+
+ SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
+ DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
+ DAG.getConstant(0x00800000, IntVT));
+
+ R = DAG.getZExtOrTrunc(R, dl, NVT);
+
+
+ R = DAG.getSelectCC(dl, Exponent, ExponentLoBit,
+ DAG.getNode(ISD::SHL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
+ dl, TLI.getShiftAmountTy(IntVT))),
+ DAG.getNode(ISD::SRL, dl, NVT, R,
+ DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
+ dl, TLI.getShiftAmountTy(IntVT))),
+ ISD::SETGT);
+
+ SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
+ Sign);
+
+ Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT),
+ DAG.getConstant(0, NVT), Ret, ISD::SETLT));
+ break;
+ }
case ISD::FP_TO_UINT: {
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
@@ -3653,7 +3737,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
Results.push_back(Sum);
- EVT OType = Node->getValueType(1);
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(Node->getValueType(0));
SDValue Zero = DAG.getConstant(0, LHS.getValueType());
@@ -3676,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Results.push_back(Cmp);
+ Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
break;
}
case ISD::UADDO:
@@ -3687,9 +3772,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
LHS, RHS);
Results.push_back(Sum);
- Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
- Node->getOpcode () == ISD::UADDO ?
- ISD::SETULT : ISD::SETUGT));
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(Node->getValueType(0));
+ ISD::CondCode CC
+ = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
break;
}
case ISD::UMULO:
@@ -3879,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
int TrueValue;
- switch (TLI.getBooleanContents(VT.isVector())) {
+ switch (TLI.getBooleanContents(Tmp1->getValueType(0))) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = 1;
@@ -3899,13 +3989,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2 = Node->getOperand(1); // RHS
Tmp3 = Node->getOperand(2); // True
Tmp4 = Node->getOperand(3); // False
+ EVT VT = Node->getValueType(0);
SDValue CC = Node->getOperand(4);
+ ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get();
+
+ if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) {
+ // If the condition code is legal, then we need to expand this
+ // node using SETCC and SELECT.
+ EVT CmpVT = Tmp1.getValueType();
+ assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
+ "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
+ "expanded.");
+ EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+ Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
+ break;
+ }
+ // SELECT_CC is legal, so the condition code must not be.
bool Legalized = false;
// Try to legalize by inverting the condition. This is for targets that
// might support an ordered version of a condition, but not the unordered
// version (or vice versa).
- ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+ ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp,
Tmp1.getValueType().isInteger());
if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
// Use the new condition code and swap true and false
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 2483184..6feac0d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -138,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
case ISD::ATOMIC_CMP_SWAP:
- Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -192,16 +194,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
+ unsigned ResNo) {
+ if (ResNo == 1) {
+ assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ EVT SVT = getSetCCResultType(N->getOperand(2).getValueType());
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+
+ // Only use the result of getSetCCResultType if it is legal,
+ // otherwise just use the promoted result type (NVT).
+ if (!TLI.isTypeLegal(SVT))
+ SVT = NVT;
+
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
+ N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
+ N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
+ N->getSynchScope());
+ ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+ ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
+ return Res.getValue(1);
+ }
+
SDValue Op2 = GetPromotedInteger(N->getOperand(2));
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
- SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
- N->getChain(), N->getBasePtr(), Op2, Op3,
- N->getMemOperand(), N->getSuccessOrdering(),
- N->getFailureOrdering(), N->getSynchScope());
+ SDVTList VTs =
+ DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
+ SDValue Res = DAG.getAtomicCmpSwap(
+ N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
+ N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
+ N->getFailureOrdering(), N->getSynchScope());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
- ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ unsigned ChainOp = N->getNumValues() - 1;
+ ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp));
return Res;
}
@@ -492,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy));
+ Mask = PromoteTargetBoolean(Mask, OpTy);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -892,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
assert(OpNo == 1 && "only know how to promote condition");
// Promote all the way up to the canonical SetCC type.
- EVT SVT = getSetCCResultType(MVT::Other);
- SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+ SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other);
// The chain (Op#0) and basic block destination (Op#2) are always legal types.
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
@@ -986,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT OpTy = N->getOperand(1).getValueType();
// Promote all the way up to the canonical SetCC type.
- EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ?
- OpTy.getScalarType() : OpTy);
- Cond = PromoteTargetBoolean(Cond, SVT);
+ EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1),
N->getOperand(2)), 0);
@@ -1143,6 +1168,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
ReplaceValueWith(SDValue(N, 1), Tmp.second);
break;
}
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other);
+ SDValue Tmp = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
+ N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
+ AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
+ AN->getSynchScope());
+
+ // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
+ // success simply by comparing the loaded value against the ingoing
+ // comparison.
+ SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp,
+ N->getOperand(2), ISD::SETEQ);
+
+ SplitInteger(Tmp, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Success);
+ ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1));
+ break;
+ }
case ISD::AND:
case ISD::OR:
@@ -2301,7 +2346,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0)
.setSExtResult();
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -2388,16 +2433,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
EVT VT = cast<AtomicSDNode>(N)->getMemoryVT();
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
SDValue Zero = DAG.getConstant(0, VT);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
- N->getOperand(0),
- N->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(N)->getMemOperand(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getOrdering(),
- cast<AtomicSDNode>(N)->getSynchScope());
+ SDValue Swap = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
+ cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
+ N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getOrdering(),
+ cast<AtomicSDNode>(N)->getSynchScope());
+
ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
- ReplaceValueWith(SDValue(N, 1), Swap.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 3971fc3..bd7dacf 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1054,7 +1054,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
- .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
@@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
-SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+///
+/// ValVT is the type of values that produced the boolean.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
SDLoc dl(Bool);
+ EVT BoolVT = getSetCCResultType(ValVT);
ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector()));
- return DAG.getNode(ExtendCode, dl, VT, Bool);
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT));
+ return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index e4bbc78..d0ca6f8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -167,7 +167,7 @@ private:
SDNode *Node, bool isSigned);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
- SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+ SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -220,7 +220,7 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic0(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
- SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+ SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
@@ -570,6 +570,7 @@ private:
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
@@ -644,6 +645,7 @@ private:
bool WidenVectorOperand(SDNode *N, unsigned OpNo);
SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue WidenVecOp_EXTEND(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue WidenVecOp_STORE(SDNode* N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f40ed76..7e2f7b6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypeExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.hasBigEndianPartOrdering(InVT) !=
+ TLI.hasBigEndianPartOrdering(OutVT))
+ std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case TargetLowering::TypeSplitVector:
GetSplitVector(InOp, Lo, Hi);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT);
std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
@@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
false, false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OutVT))
std::swap(Lo, Hi);
}
@@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+ EVT ValueVT = LD->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
unsigned Alignment = LD->getAlignment();
@@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
Hi.getValue(1));
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
// Handle endianness of the load.
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(OVT))
std::swap(Lo, Hi);
// Modified the chain - switch anything that used the old chain to use
@@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDLoc dl(N);
StoreSDNode *St = cast<StoreSDNode>(N);
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
- St->getValue().getValueType());
+ EVT ValueVT = St->getValue().getValueType();
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT);
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
unsigned Alignment = St->getAlignment();
@@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
SDValue Lo, Hi;
GetExpandedOp(St->getValue(), Lo, Hi);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 898cd29..507e7ff 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -37,12 +37,12 @@ class VectorLegalizer {
const TargetLowering &TLI;
bool Changed; // Keep track of whether anything changed
- /// LegalizedNodes - For nodes that are of legal width, and that have more
- /// than one use, this map indicates what regularized operand to use. This
- /// allows us to avoid legalizing the same thing more than once.
+ /// For nodes that are of legal width, and that have more than one use, this
+ /// map indicates what regularized operand to use. This allows us to avoid
+ /// legalizing the same thing more than once.
SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
- // Adds a node to the translation cache
+ /// \brief Adds a node to the translation cache.
void AddLegalizedOperand(SDValue From, SDValue To) {
LegalizedNodes.insert(std::make_pair(From, To));
// If someone requests legalization of the new node, return itself.
@@ -50,41 +50,81 @@ class VectorLegalizer {
LegalizedNodes.insert(std::make_pair(To, To));
}
- // Legalizes the given node
+ /// \brief Legalizes the given node.
SDValue LegalizeOp(SDValue Op);
- // Assuming the node is legal, "legalize" the results
+
+ /// \brief Assuming the node is legal, "legalize" the results.
SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
- // Implements unrolling a VSETCC.
+
+ /// \brief Implements unrolling a VSETCC.
SDValue UnrollVSETCC(SDValue Op);
- // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
- // isn't legal.
- // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
- // SINT_TO_FLOAT and SHR on vectors isn't legal.
+
+ /// \brief Implement expand-based legalization of vector operations.
+ ///
+ /// This is just a high-level routine to dispatch to specific code paths for
+ /// operations to legalize them.
+ SDValue Expand(SDValue Op);
+
+ /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
+ /// FSUB isn't legal.
+ ///
+ /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
+ /// SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
- // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
+
+ /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
- // Expand bswap of vectors into a shuffle if legal.
+
+ /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and bitcasts to the proper
+ /// type. The contents of the bits in the extended part of each element are
+ /// undef.
+ SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place, bitcasts to the proper
+ /// type, then shifts left and arithmetic shifts right to introduce a sign
+ /// extension.
+ SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
+ ///
+ /// Shuffles the low lanes of the operand into place and blends zeros into
+ /// the remaining lanes, finally bitcasting to the proper type.
+ SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
+
+ /// \brief Expand bswap of vectors into a shuffle if legal.
SDValue ExpandBSWAP(SDValue Op);
- // Implement vselect in terms of XOR, AND, OR when blend is not supported
- // by the target.
+
+ /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
+ /// supported by the target.
SDValue ExpandVSELECT(SDValue Op);
SDValue ExpandSELECT(SDValue Op);
SDValue ExpandLoad(SDValue Op);
SDValue ExpandStore(SDValue Op);
SDValue ExpandFNEG(SDValue Op);
- // Implements vector promotion; this is essentially just bitcasting the
- // operands to a different type and bitcasting the result back to the
- // original type.
- SDValue PromoteVectorOp(SDValue Op);
- // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input
- // operand to the next size up.
- SDValue PromoteVectorOpINT_TO_FP(SDValue Op);
- // Implements FP_TO_[SU]INT vector promotion of the result type; it is
- // promoted to the next size up integer type. The result is then truncated
- // back to the original type.
- SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned);
-
- public:
+
+ /// \brief Implements vector promotion.
+ ///
+ /// This is essentially just bitcasting the operands to a different type and
+ /// bitcasting the result back to the original type.
+ SDValue Promote(SDValue Op);
+
+ /// \brief Implements [SU]INT_TO_FP vector promotion.
+ ///
+ /// This is a [zs]ext of the input operand to the next size up.
+ SDValue PromoteINT_TO_FP(SDValue Op);
+
+ /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
+ ///
+ /// It is promoted to the next size up integer type. The result is then
+ /// truncated back to the original type.
+ SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
+
+public:
+ /// \brief Begin legalizer the vector operations in the DAG.
bool Run();
VectorLegalizer(SelectionDAG& dag) :
DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
@@ -254,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FP_EXTEND:
case ISD::FMA:
case ISD::SIGN_EXTEND_INREG:
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::FP_ROUND_INREG:
@@ -267,27 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
case TargetLowering::Promote:
- switch (Op.getOpcode()) {
- default:
- // "Promote" the operation by bitcasting
- Result = PromoteVectorOp(Op);
- Changed = true;
- break;
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- // "Promote" the operation by extending the operand.
- Result = PromoteVectorOpINT_TO_FP(Op);
- Changed = true;
- break;
- case ISD::FP_TO_UINT:
- case ISD::FP_TO_SINT:
- // Promote the operation by extending the operand.
- Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
- Changed = true;
- break;
- }
+ Result = Promote(Op);
+ Changed = true;
+ break;
+ case TargetLowering::Legal:
break;
- case TargetLowering::Legal: break;
case TargetLowering::Custom: {
SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
if (Tmp1.getNode()) {
@@ -297,23 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH
}
case TargetLowering::Expand:
- if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
- Result = ExpandSEXTINREG(Op);
- else if (Node->getOpcode() == ISD::BSWAP)
- Result = ExpandBSWAP(Op);
- else if (Node->getOpcode() == ISD::VSELECT)
- Result = ExpandVSELECT(Op);
- else if (Node->getOpcode() == ISD::SELECT)
- Result = ExpandSELECT(Op);
- else if (Node->getOpcode() == ISD::UINT_TO_FP)
- Result = ExpandUINT_TO_FLOAT(Op);
- else if (Node->getOpcode() == ISD::FNEG)
- Result = ExpandFNEG(Op);
- else if (Node->getOpcode() == ISD::SETCC)
- Result = UnrollVSETCC(Op);
- else
- Result = DAG.UnrollVectorOp(Op.getNode());
- break;
+ Result = Expand(Op);
}
// Make sure that the generated code is itself legal.
@@ -328,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return Result;
}
-SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
- // Vector "promotion" is basically just bitcasting and doing the operation
- // in a different type. For example, x86 promotes ISD::AND on v2i32 to
- // v1i64.
+SDValue VectorLegalizer::Promote(SDValue Op) {
+ // For a few operations there is a specific concept for promotion based on
+ // the operand's type.
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // "Promote" the operation by extending the operand.
+ return PromoteINT_TO_FP(Op);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ // Promote the operation by extending the operand.
+ return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
+ }
+
+ // The rest of the time, vector "promotion" is basically just bitcasting and
+ // doing the operation in a different type. For example, x86 promotes
+ // ISD::AND on v2i32 to v1i64.
MVT VT = Op.getSimpleValueType();
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
@@ -351,7 +375,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
-SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
+SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
// INT_TO_FP operations may require the input operand be promoted even
// when the type is otherwise legal.
EVT VT = Op.getOperand(0).getValueType();
@@ -387,7 +411,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
// elements and then truncate the result. This is different from the default
// PromoteVector which uses bitcast to promote thus assumning that the
// promoted vector type has the same overall size.
-SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) {
+SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
assert(Op.getNode()->getNumValues() == 1 &&
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
@@ -609,6 +633,33 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
return TF;
}
+SDValue VectorLegalizer::Expand(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::SIGN_EXTEND_INREG:
+ return ExpandSEXTINREG(Op);
+ case ISD::ANY_EXTEND_VECTOR_INREG:
+ return ExpandANY_EXTEND_VECTOR_INREG(Op);
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ return ExpandZERO_EXTEND_VECTOR_INREG(Op);
+ case ISD::BSWAP:
+ return ExpandBSWAP(Op);
+ case ISD::VSELECT:
+ return ExpandVSELECT(Op);
+ case ISD::SELECT:
+ return ExpandSELECT(Op);
+ case ISD::UINT_TO_FP:
+ return ExpandUINT_TO_FLOAT(Op);
+ case ISD::FNEG:
+ return ExpandFNEG(Op);
+ case ISD::SETCC:
+ return UnrollVSETCC(Op);
+ default:
+ return DAG.UnrollVectorOp(Op.getNode());
+ }
+}
+
SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Lower a select instruction where the condition is a scalar and the
// operands are vectors. Lower this select to VSELECT and implement it
@@ -686,6 +737,85 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
+// Generically expand a vector anyext in register to a shuffle of the relevant
+// lanes into the appropriate locations, with other lanes left undef.
+SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build a base mask of undef shuffles.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.resize(NumSrcElements, -1);
+
+ // Place the extended lanes into the correct locations.
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
+
+ return DAG.getNode(
+ ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
+}
+
+SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // First build an any-extend node which can be legalized above when we
+ // recurse through it.
+ Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
+
+ // Now we need sign extend. Do this by shifting the elements. Even if these
+ // aren't legal operations, they have a better chance of being legalized
+ // without full scalarization than the sign extension does.
+ unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
+ unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+ SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT);
+ return DAG.getNode(ISD::SRA, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
+ ShiftAmount);
+}
+
+// Generically expand a vector zext in register to a shuffle of the relevant
+// lanes into the appropriate locations, a blend of zero into the high bits,
+// and a bitcast to the wider element type.
+SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ int NumSrcElements = SrcVT.getVectorNumElements();
+
+ // Build up a zero vector to blend into this one.
+ EVT SrcScalarVT = SrcVT.getScalarType();
+ SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT);
+ SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero);
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands);
+
+ // Shuffle the incoming lanes into the correct position, and pull all other
+ // lanes from the zero vector.
+ SmallVector<int, 16> ShuffleMask;
+ ShuffleMask.reserve(NumSrcElements);
+ for (int i = 0; i < NumSrcElements; ++i)
+ ShuffleMask.push_back(i);
+
+ int ExtLaneScale = NumSrcElements / NumElements;
+ int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0;
+ for (int i = 0; i < NumElements; ++i)
+ ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
+
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
+}
+
SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
EVT VT = Op.getValueType();
@@ -729,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// FIXME: Sign extend 1 to all ones if thats legal on the target.
if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
- TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
- TLI.getBooleanContents(true) !=
- TargetLowering::ZeroOrNegativeOneBooleanContent)
+ TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
+ TLI.getBooleanContents(Op1.getValueType()) !=
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
return DAG.UnrollVectorOp(Op.getNode());
// If the mask and the type are different sizes, unroll the vector op. This
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 368eba3..f77c592 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDValue Cond = GetScalarizedVector(N->getOperand(0));
SDValue LHS = GetScalarizedVector(N->getOperand(1));
- TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
- TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0)->getValueType(0);
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
if (ScalarBool != VecBool) {
EVT CondVT = Cond.getValueType();
switch (ScalarBool) {
@@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
// Vectors may have a different boolean contents to scalars. Promote the
// value appropriately.
ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(true));
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
return DAG.getNode(ExtendCode, DL, NVT, Res);
}
@@ -545,6 +563,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -765,6 +784,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
TLI.getVectorIdxTy()));
}
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT SubVecVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr =
+ DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
@@ -1511,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ADD:
case ISD::AND:
- case ISD::BSWAP:
case ISD::MUL:
case ISD::MULHS:
case ISD::MULHU:
@@ -1558,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::BSWAP:
case ISD::CTLZ:
case ISD::CTPOP:
case ISD::CTTZ:
@@ -2343,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
case ISD::FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
Res = WidenVecOp_Convert(N);
break;
}
@@ -2372,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ // If some legalization strategy other than widening is used on the operand,
+ // we can't safely assume that just extending the low lanes is the correct
+ // transformation.
+ if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+ return WidenVecOp_Convert(N);
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+ EVT FixedVT = (MVT::SimpleValueType)i;
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ else
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ case ISD::SIGN_EXTEND:
+ return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ case ISD::ZERO_EXTEND:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ }
+}
+
SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Since the result is legal and the input is illegal, it is unlikely
// that we can fix the input to a legal type so unroll the convert
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index f92230c..624003f 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -442,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
ResCount -= (regPressureDelta(SU) * ScaleTwo);
}
- // These are platform specific things.
+ // These are platform-specific things.
// Will need to go into the back end
// and accessed from here via a hook.
for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 78ec4df..13cfae7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -170,7 +170,8 @@ public:
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
else
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+ tm.getSubtargetImpl(), this);
}
~ScheduleDAGRRList() {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 51c51d6..4589b0c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -73,7 +73,8 @@ public:
: ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetMachine &tm = mf.getTarget();
- HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(
+ tm.getSubtargetImpl(), this);
}
~ScheduleDAGVLIW() {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b1b8035..daff1f2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -48,6 +48,7 @@
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include <algorithm>
#include <cmath>
+
using namespace llvm;
/// makeVTList - Return an instance of the SDVTList struct initialized with the
@@ -147,33 +148,34 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
- unsigned i = 0, e = N->getNumOperands();
-
- // Skip over all of the undef values.
- while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
- ++i;
+ bool IsAllUndef = true;
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF)
+ continue;
+ IsAllUndef = false;
+ // Do not accept build_vectors that aren't all constants or which have non-0
+ // elements. We have to be a bit careful here, as the type of the constant
+ // may not be the same as the type of the vector elements due to type
+ // legalization (the elements are promoted to a legal type for the target
+ // and a vector of a type may be legal when the base element type is not).
+ // We only want to check enough bits to cover the vector elements, because
+ // we care if the resultant vector is all zeros, not whether the individual
+ // constants are.
+ SDValue Zero = N->getOperand(i);
+ unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
+ if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ return false;
+ } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
+ if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ return false;
+ } else
+ return false;
+ }
// Do not accept an all-undef vector.
- if (i == e) return false;
-
- // Do not accept build_vectors that aren't all constants or which have non-0
- // elements.
- SDValue Zero = N->getOperand(i);
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) {
- if (!CN->isNullValue())
- return false;
- } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) {
- if (!CFPN->getValueAPF().isPosZero())
- return false;
- } else
+ if (IsAllUndef)
return false;
-
- // Okay, we have at least one 0 value, check to see if the rest match or are
- // undefs.
- for (++i; i != e; ++i)
- if (N->getOperand(i) != Zero &&
- N->getOperand(i).getOpcode() != ISD::UNDEF)
- return false;
return true;
}
@@ -381,6 +383,20 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
}
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw,
+ bool exact) {
+ ID.AddBoolean(nuw);
+ ID.AddBoolean(nsw);
+ ID.AddBoolean(exact);
+}
+
+/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos
+static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode,
+ bool nuw, bool nsw, bool exact) {
+ if (isBinOpWithFlags(Opcode))
+ AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
+}
+
static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
@@ -473,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::MUL:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::SHL: {
+ const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N);
+ AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(),
+ BinNode->hasNoSignedWrap(), BinNode->isExact());
+ break;
+ }
case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -527,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
// Add the return value info.
AddNodeIDValueTypes(ID, N->getVTList());
// Add the operand info.
- AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end()));
+ AddNodeIDOperands(ID, N->ops());
// Handle SDNode leafs with special info.
AddNodeIDCustom(ID, N);
@@ -926,6 +956,25 @@ void SelectionDAG::allnodes_clear() {
DeallocateNode(AllNodes.begin());
}
+BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
+ SDVTList VTs, SDValue N1,
+ SDValue N2, bool nuw, bool nsw,
+ bool exact) {
+ if (isBinOpWithFlags(Opcode)) {
+ BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode(
+ Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ FN->setHasNoUnsignedWrap(nuw);
+ FN->setHasNoSignedWrap(nsw);
+ FN->setIsExact(exact);
+
+ return FN;
+ }
+
+ BinarySDNode *N = new (NodeAllocator)
+ BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2);
+ return N;
+}
+
void SelectionDAG::clear() {
allnodes_clear();
OperandAllocator.Reset();
@@ -963,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
-SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) {
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT,
+ EVT OpVT) {
if (VT.bitsLE(Op.getValueType()))
return getNode(ISD::TRUNCATE, SL, VT, Op);
- TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT);
return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
}
@@ -983,6 +1033,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
getConstant(Imm, Op.getValueType()));
}
+SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) {
+ assert(VT.isVector() && "This DAG node is restricted to vector types.");
+ assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+ "The sizes of the input and result must match in order to perform the "
+ "extend in-register.");
+ assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
+ "The destination vector type must have fewer lanes than the input.");
+ return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
@@ -995,7 +1075,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue TrueValue;
- switch (TLI->getBooleanContents(VT.isVector())) {
+ switch (TLI->getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
case TargetLowering::UndefinedBooleanContent:
TrueValue = getConstant(1, VT);
@@ -1190,15 +1270,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
if (BitWidth < 64)
Offset = SignExtend64(Offset, BitWidth);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias then use the aliasee for determining thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
- }
-
unsigned Opc;
- if (GVar && GVar->isThreadLocal())
+ if (GV->isThreadLocal())
Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
else
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
@@ -1454,6 +1527,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
N1 = getUNDEF(VT);
commuteShuffle(N1, N2, MaskVec);
}
+ // Reset our undef status after accounting for the mask.
+ N2Undef = N2.getOpcode() == ISD::UNDEF;
+ // Re-check whether both sides ended up undef.
+ if (N1.getOpcode() == ISD::UNDEF && N2Undef)
+ return getUNDEF(VT);
// If Identity shuffle return that node.
bool Identity = true;
@@ -1464,9 +1542,36 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
return N1;
// Shuffling a constant splat doesn't change the result.
- if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR)
- if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue())
- return N1;
+ if (N2Undef) {
+ SDValue V = N1;
+
+ // Look through any bitcasts. We check that these don't change the number
+ // (and size) of elements and just changes their types.
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V->getOperand(0);
+
+ // A splat should always show up as a build vector node.
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ BitVector UndefElements;
+ SDValue Splat = BV->getSplatValue(&UndefElements);
+ // If this is a splat of an undef, shuffling it is also undef.
+ if (Splat && Splat.getOpcode() == ISD::UNDEF)
+ return getUNDEF(VT);
+
+ // We only have a splat which can skip shuffles if there is a splatted
+ // value and no undef lanes rearranged by the shuffle.
+ if (Splat && UndefElements.none()) {
+ // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the
+ // number of elements match or the value splatted is a zero constant.
+ if (V.getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements())
+ return N1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Splat))
+ if (C->isNullValue())
+ return N1;
+ }
+ }
+ }
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
@@ -1692,7 +1797,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
case ISD::SETTRUE:
case ISD::SETTRUE2: {
const TargetLowering *TLI = TM.getTargetLowering();
- TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent Cnt =
+ TLI->getBooleanContents(N1->getValueType(0));
return getConstant(
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
}
@@ -1923,11 +2029,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
case ISD::UMULO:
if (Op.getResNo() != 1)
break;
- // The boolean result conforms to getBooleanContents. Fall through.
+ // The boolean result conforms to getBooleanContents.
+ // If we know the result of a setcc has the top bits zero, use this info.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+ break;
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
- if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
- TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
break;
case ISD::SHL:
@@ -2043,7 +2158,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeKnownBitsLoad(*Ranges, KnownZero);
+ computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
}
break;
}
@@ -2192,8 +2307,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- KnownZero |= ~LowBits;
- computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+
+ // The upper bits are all zero, the lower ones are unchanged.
+ KnownZero = KnownZero2 | ~LowBits;
+ KnownOne = KnownOne2 & LowBits;
break;
}
}
@@ -2323,9 +2441,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (Op.getResNo() != 1)
break;
// The boolean result conforms to getBooleanContents. Fall through.
+ // If setcc returns 0/-1, all bits are sign bits.
+ // We know that we have an integer-based boolean since these operations
+ // are only available for integer.
+ if (TLI->getBooleanContents(Op.getValueType().isVector(), false) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent)
+ return VTBits;
+ break;
case ISD::SETCC:
// If setcc returns 0/-1, all bits are sign bits.
- if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
+ if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) ==
TargetLowering::ZeroOrNegativeOneBooleanContent)
return VTBits;
break;
@@ -2940,7 +3065,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
- SDValue N2) {
+ SDValue N2, bool nuw, bool nsw, bool exact) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
@@ -3380,22 +3505,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
}
// Memoize this node if possible.
- SDNode *N;
+ BinarySDNode *N;
SDVTList VTs = getVTList(VT);
+ const bool BinOpHasFlags = isBinOpWithFlags(Opcode);
if (VT != MVT::Glue) {
- SDValue Ops[] = { N1, N2 };
+ SDValue Ops[] = {N1, N2};
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
+ if (BinOpHasFlags)
+ AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact);
void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2);
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
+
CSEMap.InsertNode(N, IP);
} else {
- N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(),
- DL.getDebugLoc(), VTs, N1, N2);
+
+ N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact);
}
AllNodes.push_back(N);
@@ -3583,7 +3711,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT == MVT::f32 || VT == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
@@ -4110,7 +4238,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4166,7 +4294,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4230,7 +4358,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
.setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
Type::getVoidTy(*getContext()),
getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy()), &Args, 0)
+ TLI->getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
@@ -4281,51 +4409,47 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
Ordering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain, SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
+SDValue SelectionDAG::getAtomicCmpSwap(
+ unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain,
+ SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
+ unsigned Alignment, AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
+ assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
MachineFunction &MF = getMachineFunction();
- // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE.
- // For now, atomics are considered to be volatile always.
// FIXME: Volatile isn't really correct; we should keep track of atomic
// orderings in the memoperand.
unsigned Flags = MachineMemOperand::MOVolatile;
- if (Opcode != ISD::ATOMIC_STORE)
- Flags |= MachineMemOperand::MOLoad;
- if (Opcode != ISD::ATOMIC_LOAD)
- Flags |= MachineMemOperand::MOStore;
+ Flags |= MachineMemOperand::MOLoad;
+ Flags |= MachineMemOperand::MOStore;
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO,
- SuccessOrdering, FailureOrdering, SynchScope);
+ return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
}
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, MachineMemOperand *MMO,
- AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering,
- SynchronizationScope SynchScope) {
- assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT,
+ SDVTList VTs, SDValue Chain, SDValue Ptr,
+ SDValue Cmp, SDValue Swp,
+ MachineMemOperand *MMO,
+ AtomicOrdering SuccessOrdering,
+ AtomicOrdering FailureOrdering,
+ SynchronizationScope SynchScope) {
+ assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
+ Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
- EVT VT = Cmp.getValueType();
-
- SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering,
- FailureOrdering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
+ SuccessOrdering, FailureOrdering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -5610,10 +5734,13 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
- ArrayRef<SDValue> Ops) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+ ArrayRef<SDValue> Ops, bool nuw, bool nsw,
+ bool exact) {
+ if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
+ if (isBinOpWithFlags(Opcode))
+ AddBinaryNodeIDCustom(ID, nuw, nsw, exact);
void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return E;
@@ -5960,7 +6087,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// count of outstanding operands.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
SDNode *N = I++;
- checkForCycles(N);
+ checkForCycles(N, this);
unsigned Degree = N->getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
@@ -5980,7 +6107,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// such that by the time the end is reached all nodes will be sorted.
for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
SDNode *N = I;
- checkForCycles(N);
+ checkForCycles(N, this);
// N is in sorted position, so all its uses have one less operand
// that needs to be sorted.
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
@@ -6005,7 +6132,9 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
#ifndef NDEBUG
SDNode *S = ++I;
dbgs() << "Overran sorted position:\n";
- S->dumprFull();
+ S->dumprFull(this); dbgs() << "\n";
+ dbgs() << "Checking if this is due to cycles\n";
+ checkForCycles(this, true);
#endif
llvm_unreachable(nullptr);
}
@@ -6554,16 +6683,43 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
return true;
}
-ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const {
- SDValue Op0 = getOperand(0);
- if (Op0.getOpcode() != ISD::Constant)
- return nullptr;
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ if (UndefElements) {
+ UndefElements->clear();
+ UndefElements->resize(getNumOperands());
+ }
+ SDValue Splatted;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ SDValue Op = getOperand(i);
+ if (Op.getOpcode() == ISD::UNDEF) {
+ if (UndefElements)
+ (*UndefElements)[i] = true;
+ } else if (!Splatted) {
+ Splatted = Op;
+ } else if (Splatted != Op) {
+ return SDValue();
+ }
+ }
+
+ if (!Splatted) {
+ assert(getOperand(0).getOpcode() == ISD::UNDEF &&
+ "Can only have a splat without a constant for all undefs.");
+ return getOperand(0);
+ }
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
- if (getOperand(i) != Op0)
- return nullptr;
+ return Splatted;
+}
- return cast<ConstantSDNode>(Op0);
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(UndefElements).getNode());
+}
+
+ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(UndefElements).getNode());
}
bool BuildVectorSDNode::isConstant() const {
@@ -6591,10 +6747,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
-#ifdef XDEBUG
+#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSet<const SDNode*, 32> &Visited,
- SmallPtrSet<const SDNode*, 32> &Checked) {
+ SmallPtrSet<const SDNode*, 32> &Checked,
+ const llvm::SelectionDAG *DAG) {
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
@@ -6602,29 +6759,37 @@ static void checkForCyclesHelper(const SDNode *N,
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
- dbgs() << "Offending node:\n";
- N->dumprFull();
errs() << "Detected cycle in SelectionDAG\n";
+ dbgs() << "Offending node:\n";
+ N->dumprFull(DAG); dbgs() << "\n";
abort();
}
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG);
Checked.insert(N);
Visited.erase(N);
}
#endif
-void llvm::checkForCycles(const llvm::SDNode *N) {
+void llvm::checkForCycles(const llvm::SDNode *N,
+ const llvm::SelectionDAG *DAG,
+ bool force) {
+#ifndef NDEBUG
+ bool check = force;
#ifdef XDEBUG
- assert(N && "Checking nonexistent SDNode");
- SmallPtrSet<const SDNode*, 32> visited;
- SmallPtrSet<const SDNode*, 32> checked;
- checkForCyclesHelper(N, visited, checked);
-#endif
+ check = true;
+#endif // XDEBUG
+ if (check) {
+ assert(N && "Checking nonexistent SDNode");
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked, DAG);
+ }
+#endif // !NDEBUG
}
-void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
- checkForCycles(DAG->getRoot().getNode());
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) {
+ checkForCycles(DAG->getRoot().getNode(), DAG, force);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 070e929..28d8e98 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
- if (TLI.isBigEndian())
+ if (TLI.hasBigEndianPartOrdering(ValueVT))
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
@@ -2784,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) {
void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(OpCode, getCurSDLoc(),
- Op1.getValueType(), Op1, Op2));
+
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+
+ SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
+ Op1, Op2, nuw, nsw, exact);
+ setValue(&I, BinNodeValue);
}
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
@@ -2816,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
- setValue(&I, DAG.getNode(Opcode, getCurSDLoc(),
- Op1.getValueType(), Op1, Op2));
+ bool nuw = false;
+ bool nsw = false;
+ bool exact = false;
+
+ if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
+
+ if (const OverflowingBinaryOperator *OFBinOp =
+ dyn_cast<const OverflowingBinaryOperator>(&I)) {
+ nuw = OFBinOp->hasNoUnsignedWrap();
+ nsw = OFBinOp->hasNoSignedWrap();
+ }
+ if (const PossiblyExactOperator *ExactOp =
+ dyn_cast<const PossiblyExactOperator>(&I))
+ exact = ExactOp->isExact();
+ }
+
+ SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
+ nuw, nsw, exact);
+ setValue(&I, Res);
}
void SelectionDAGBuilder::visitSDiv(const User &I) {
@@ -3570,12 +3601,12 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
if (Before) {
if (Order == AcquireRelease || Order == SequentiallyConsistent)
Order = Release;
- else if (Order == Acquire || Order == Monotonic)
+ else if (Order == Acquire || Order == Monotonic || Order == Unordered)
return Chain;
} else {
if (Order == AcquireRelease)
Order = Acquire;
- else if (Order == Release || Order == Monotonic)
+ else if (Order == Release || Order == Monotonic || Order == Unordered)
return Chain;
}
SDValue Ops[3];
@@ -3598,19 +3629,17 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl,
DAG, *TLI);
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
- getValue(I.getCompareOperand()).getSimpleValueType(),
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getCompareOperand()),
- getValue(I.getNewValOperand()),
- MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
- TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
- TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder,
- Scope);
+ MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
+ SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
+ SDValue L = DAG.getAtomicCmpSwap(
+ ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
+ 0 /* Alignment */,
+ TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder,
+ TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope);
- SDValue OutChain = L.getValue(1);
+ SDValue OutChain = L.getValue(2);
if (TLI->getInsertFencesForAtomic())
OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl,
@@ -5293,7 +5322,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
CLI.setDebugLoc(sdl).setChain(getRoot())
.setCallee(CallingConv::C, I.getType(),
DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -5410,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
bool isTailCall,
MachineBasicBlock *LandingPad) {
+ const TargetLowering *TLI = TM.getTargetLowering();
PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
@@ -5420,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
- // Check whether the function can return without sret-demotion.
- SmallVector<ISD::OutputArg, 4> Outs;
- const TargetLowering *TLI = TM.getTargetLowering();
- GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI);
-
- bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(),
- DAG.getMachineFunction(),
- FTy->isVarArg(), Outs,
- FTy->getContext());
-
- SDValue DemoteStackSlot;
- int DemoteStackIdx = -100;
-
- if (!CanLowerReturn) {
- assert(!CS.hasInAllocaArgument() &&
- "sret demotion is incompatible with inalloca");
- uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(
- FTy->getReturnType());
- unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(
- FTy->getReturnType());
- MachineFunction &MF = DAG.getMachineFunction();
- DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
- Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
-
- DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy());
- Entry.Node = DemoteStackSlot;
- Entry.Ty = StackSlotPtrType;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Entry.isInReg = false;
- Entry.isSRet = true;
- Entry.isNest = false;
- Entry.isByVal = false;
- Entry.isReturned = false;
- Entry.Alignment = Align;
- Args.push_back(Entry);
- RetTy = Type::getVoidTy(FTy->getContext());
- }
-
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
const Value *V = *i;
@@ -5499,58 +5490,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
// Check if target-independent constraints permit a tail call here.
// Target-dependent constraints are checked within TLI->LowerCallTo.
- if (isTailCall && !isInTailCallPosition(CS, *TLI))
+ if (isTailCall && !isInTailCallPosition(CS, DAG))
isTailCall = false;
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall);
+ .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall);
std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
"Null value expected with tail call!");
- if (Result.first.getNode()) {
+ if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
- } else if (!CanLowerReturn && Result.second.getNode()) {
- // The instruction result is the result of loading from the
- // hidden sret parameter.
- SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
-
- ComputeValueVTs(*TLI, PtrRetTy, PVTs);
- assert(PVTs.size() == 1 && "Pointers should fit in one register");
- EVT PtrVT = PVTs[0];
-
- SmallVector<EVT, 4> RetTys;
- SmallVector<uint64_t, 4> Offsets;
- RetTy = FTy->getReturnType();
- ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets);
-
- unsigned NumValues = RetTys.size();
- SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
-
- for (unsigned i = 0; i < NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT,
- DemoteStackSlot,
- DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add,
- MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
- false, false, false, 1);
- Values[i] = L;
- Chains[i] = L.getValue(1);
- }
-
- SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, Chains);
- PendingLoads.push_back(Chain);
-
- setValue(CS.getInstruction(),
- DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(RetTys), Values));
- }
if (!Result.second.getNode()) {
// As a special case, a null chain means that a tail call has been emitted
@@ -6845,7 +6798,7 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
- .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs)
+ .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs)
.setDiscardResult(!CI.use_empty());
const TargetLowering *TLI = TM.getTargetLowering();
@@ -7092,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
}
+/// Returns an AttributeSet representing the attributes applied to the return
+/// value of the given call.
+static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
+ SmallVector<Attribute::AttrKind, 2> Attrs;
+ if (CLI.RetSExt)
+ Attrs.push_back(Attribute::SExt);
+ if (CLI.RetZExt)
+ Attrs.push_back(Attribute::ZExt);
+ if (CLI.IsInReg)
+ Attrs.push_back(Attribute::InReg);
+
+ return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex,
+ Attrs);
+}
+
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
/// implementation, which just calls LowerCall.
/// FIXME: When all targets are
@@ -7100,24 +7068,62 @@ std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle the incoming return values from the call.
CLI.Ins.clear();
+ Type *OrigRetTy = CLI.RetTy;
SmallVector<EVT, 4> RetTys;
- ComputeValueVTs(*this, CLI.RetTy, RetTys);
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
- for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
- MyFlags.ArgVT = VT;
- MyFlags.Used = CLI.IsReturnValueUsed;
- if (CLI.RetSExt)
- MyFlags.Flags.setSExt();
- if (CLI.RetZExt)
- MyFlags.Flags.setZExt();
- if (CLI.IsInReg)
- MyFlags.Flags.setInReg();
- CLI.Ins.push_back(MyFlags);
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets);
+
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this);
+
+ bool CanLowerReturn =
+ this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
+ CLI.IsVarArg, Outs, CLI.RetTy->getContext());
+
+ SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
+ if (!CanLowerReturn) {
+ // FIXME: equivalent assert?
+ // assert(!CS.hasInAllocaArgument() &&
+ // "sret demotion is incompatible with inalloca");
+ uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy);
+ unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy);
+ MachineFunction &MF = CLI.DAG.getMachineFunction();
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
+
+ DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy());
+ ArgListEntry Entry;
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.isReturned = false;
+ Entry.Alignment = Align;
+ CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
+ } else {
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ ISD::InputArg MyFlags;
+ MyFlags.VT = RegisterVT;
+ MyFlags.ArgVT = VT;
+ MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetSExt)
+ MyFlags.Flags.setSExt();
+ if (CLI.RetZExt)
+ MyFlags.Flags.setZExt();
+ if (CLI.IsInReg)
+ MyFlags.Flags.setInReg();
+ CLI.Ins.push_back(MyFlags);
+ }
}
}
@@ -7260,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
"LowerCall emitted a value with the wrong type!");
});
- // Collect the legal value parts into potentially illegal values
- // that correspond to the original function's return values.
- ISD::NodeType AssertOp = ISD::DELETED_NODE;
- if (CLI.RetSExt)
- AssertOp = ISD::AssertSext;
- else if (CLI.RetZExt)
- AssertOp = ISD::AssertZext;
SmallVector<SDValue, 4> ReturnValues;
- unsigned CurReg = 0;
- for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
- EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
-
- ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT, nullptr,
- AssertOp));
- CurReg += NumRegs;
- }
-
- // For a function returning void, there is no return value. We can't create
- // such a node, so we just return a null return value in that case. In
- // that case, nothing will actually look at the value.
- if (ReturnValues.empty())
- return std::make_pair(SDValue(), CLI.Chain);
+ if (!CanLowerReturn) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+
+ ComputeValueVTs(*this, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+
+ unsigned NumValues = RetTys.size();
+ ReturnValues.resize(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
+ CLI.DAG.getConstant(Offsets[i], PtrVT));
+ SDValue L = CLI.DAG.getLoad(
+ RetTys[i], CLI.DL, CLI.Chain, Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
+ false, false, 1);
+ ReturnValues[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
+ } else {
+ // Collect the legal value parts into potentially illegal values
+ // that correspond to the original function's return values.
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ if (CLI.RetSExt)
+ AssertOp = ISD::AssertSext;
+ else if (CLI.RetZExt)
+ AssertOp = ISD::AssertZext;
+ unsigned CurReg = 0;
+ for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ EVT VT = RetTys[I];
+ MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+
+ ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+ NumRegs, RegisterVT, VT, nullptr,
+ AssertOp));
+ CurReg += NumRegs;
+ }
+
+ // For a function returning void, there is no return value. We can't create
+ // such a node, so we just return a null return value in that case. In
+ // that case, nothing will actually look at the value.
+ if (ReturnValues.empty())
+ return std::make_pair(SDValue(), CLI.Chain);
+ }
SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
CLI.DAG.getVTList(RetTys), ReturnValues);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index fb29691..84679f9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -320,7 +320,7 @@ private:
/// 1. Preserve the architecture independence of stack protector generation.
///
/// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform specific stack protector
+ /// OpenBSD for which we support platform-specific stack protector
/// generation.
///
/// The main problem that guided the present solution is that one can not
@@ -338,7 +338,7 @@ private:
/// basic block (where the return inst is placed) and then move it back
/// later at SelectionDAG/MI time before the stack protector check if the
/// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform specific pattern
+ /// immediately since it would require platform-specific pattern
/// matching. The SelectionDAG level option was nixed because
/// SelectionDAG only processes one IR level basic block at a time
/// implying one could not create a DAG Combine to move the callinst.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index d6b5255..b3a452f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -55,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::PREFETCH: return "Prefetch";
case ISD::ATOMIC_FENCE: return "AtomicFence";
case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap";
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess";
case ISD::ATOMIC_SWAP: return "AtomicSwap";
case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd";
case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub";
@@ -220,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND: return "zero_extend";
case ISD::ANY_EXTEND: return "any_extend";
case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+ case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg";
+ case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg";
+ case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 472fc9c..57e22e2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+
+// Intrinsic instructions...
+STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call");
+STATISTIC(NumFastIselFailSAddWithOverflow,
+ "Fast isel fails on sadd.with.overflow");
+STATISTIC(NumFastIselFailUAddWithOverflow,
+ "Fast isel fails on uadd.with.overflow");
+STATISTIC(NumFastIselFailSSubWithOverflow,
+ "Fast isel fails on ssub.with.overflow");
+STATISTIC(NumFastIselFailUSubWithOverflow,
+ "Fast isel fails on usub.with.overflow");
+STATISTIC(NumFastIselFailSMulWithOverflow,
+ "Fast isel fails on smul.with.overflow");
+STATISTIC(NumFastIselFailUMulWithOverflow,
+ "Fast isel fails on umul.with.overflow");
+STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress");
+STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call");
+STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call");
+STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call");
#endif
static cl::opt<bool>
@@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) {
case Instruction::FCmp: NumFastIselFailFCmp++; return;
case Instruction::PHI: NumFastIselFailPHI++; return;
case Instruction::Select: NumFastIselFailSelect++; return;
- case Instruction::Call: NumFastIselFailCall++; return;
+ case Instruction::Call: {
+ if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) {
+ switch (Intrinsic->getIntrinsicID()) {
+ default:
+ NumFastIselFailIntrinsicCall++; return;
+ case Intrinsic::sadd_with_overflow:
+ NumFastIselFailSAddWithOverflow++; return;
+ case Intrinsic::uadd_with_overflow:
+ NumFastIselFailUAddWithOverflow++; return;
+ case Intrinsic::ssub_with_overflow:
+ NumFastIselFailSSubWithOverflow++; return;
+ case Intrinsic::usub_with_overflow:
+ NumFastIselFailUSubWithOverflow++; return;
+ case Intrinsic::smul_with_overflow:
+ NumFastIselFailSMulWithOverflow++; return;
+ case Intrinsic::umul_with_overflow:
+ NumFastIselFailUMulWithOverflow++; return;
+ case Intrinsic::frameaddress:
+ NumFastIselFailFrameaddress++; return;
+ case Intrinsic::sqrt:
+ NumFastIselFailSqrt++; return;
+ case Intrinsic::experimental_stackmap:
+ NumFastIselFailStackMap++; return;
+ case Intrinsic::experimental_patchpoint_void: // fall-through
+ case Intrinsic::experimental_patchpoint_i64:
+ NumFastIselFailPatchPoint++; return;
+ }
+ }
+ NumFastIselFailCall++;
+ return;
+ }
case Instruction::Shl: NumFastIselFailShl++; return;
case Instruction::LShr: NumFastIselFailLShr++; return;
case Instruction::AShr: NumFastIselFailAShr++; return;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b75d805..42372a2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -105,7 +105,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
.setSExtResult(isSigned).setZExtResult(!isSigned);
return LowerCallTo(CLI);
@@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ // Early return, as this function cannot handle vector types.
+ if (Op.getValueType().isVector())
+ return false;
+
// Don't do this if the node has another user, which may require the
// full value.
if (!Op.getNode()->hasOneUse())
@@ -1146,18 +1150,21 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
if (!N)
return false;
- bool IsVec = false;
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
if (!CN) {
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
- IsVec = true;
- CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
}
- switch (getBooleanContents(IsVec)) {
+ switch (getBooleanContents(N->getValueType(0))) {
case UndefinedBooleanContent:
return CN->getAPIntValue()[0];
case ZeroOrOneBooleanContent:
@@ -1173,18 +1180,21 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (!N)
return false;
- bool IsVec = false;
const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
if (!CN) {
const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
- IsVec = true;
- CN = BV->getConstantSplatValue();
+ BitVector UndefElements;
+ CN = BV->getConstantSplatNode(&UndefElements);
+ // Only interested in constant splats, and we don't try to handle undef
+ // elements in identifying boolean constants.
+ if (!CN || UndefElements.none())
+ return false;
}
- if (getBooleanContents(IsVec) == UndefinedBooleanContent)
+ if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
return CN->isNullValue();
@@ -1205,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
case ISD::SETFALSE2: return DAG.getConstant(0, VT);
case ISD::SETTRUE:
case ISD::SETTRUE2: {
- TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector());
+ TargetLowering::BooleanContent Cnt =
+ getBooleanContents(N0->getValueType(0));
return DAG.getConstant(
Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT);
}
@@ -1412,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
NewConst, Cond);
- return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
}
break;
}
@@ -1496,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
} else if (N1C->getAPIntValue() == 1 &&
(VT == MVT::i1 ||
- getBooleanContents(false) == ZeroOrOneBooleanContent)) {
+ getBooleanContents(N0->getValueType(0)) ==
+ ZeroOrOneBooleanContent)) {
SDValue Op0 = N0;
if (Op0.getOpcode() == ISD::TRUNCATE)
Op0 = Op0.getOperand(0);
@@ -1767,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
uint64_t EqVal = 0;
- switch (getBooleanContents(N0.getValueType().isVector())) {
+ switch (getBooleanContents(N0.getValueType())) {
case UndefinedBooleanContent:
case ZeroOrOneBooleanContent:
EqVal = ISD::isTrueWhenEqual(Cond);
@@ -2613,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
if (ShAmt) {
// TODO: For UDIV use SRL instead of SRA.
SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
- Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+ Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false,
+ true);
d = d.ashr(ShAmt);
}
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index 1120be8..0e89bad 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -15,8 +15,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
- : DL(TM.getDataLayout()) {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL)
+ : DL(DL) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 4dd87dd..3ba502f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -28,10 +28,9 @@ using namespace llvm;
#define DEBUG_TYPE "stackmaps"
namespace llvm {
-cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness",
- cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass"));
cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness",
- cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass"));
+ cl::Hidden, cl::init(true),
+ cl::desc("Enable PatchPoint Liveness Analysis Pass"));
}
STATISTIC(NumStackMapFuncVisited, "Number of functions visited");
@@ -62,15 +61,17 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const {
/// Calculate the liveness information for the given machine function.
bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) {
+ if (!EnablePatchPointLiveness)
+ return false;
+
DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: "
<< _MF.getName() << " **********\n");
MF = &_MF;
TRI = MF->getTarget().getRegisterInfo();
++NumStackMapFuncVisited;
- // Skip this function if there are no stackmaps or patchpoints to process.
- if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) ||
- (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) {
+ // Skip this function if there are no patchpoints to process.
+ if (!MF->getFrameInfo()->hasPatchPoint()) {
++NumStackMapFuncSkipped;
return false;
}
@@ -88,13 +89,10 @@ bool StackMapLiveness::calculateLiveness() {
LiveRegs.addLiveOuts(MBBI);
bool HasStackMap = false;
// Reverse iterate over all instructions and add the current live register
- // set to an instruction if we encounter a stackmap or patchpoint
- // instruction.
+ // set to an instruction if we encounter a patchpoint instruction.
for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(),
E = MBBI->rend(); I != E; ++I) {
- int Opc = I->getOpcode();
- if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) ||
- (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) {
+ if (I->getOpcode() == TargetOpcode::PATCHPOINT) {
addLiveOutSetToMI(*I);
HasChanged = true;
HasStackMap = true;
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index c3f84c6..83966bd0 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -671,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const {
// Default implementation of CreateTargetRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
+CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const {
// Dummy hazard recognizer allows all instructions to issue.
return new ScheduleHazardRecognizer();
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 2634d71..c574fd4 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
/// InitLibcallNames - Set default libcall names.
///
-static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
+static void InitLibcallNames(const char **Names, const Triple &TT) {
Names[RTLIB::SHL_I16] = "__ashlhi3";
Names[RTLIB::SHL_I32] = "__ashlsi3";
Names[RTLIB::SHL_I64] = "__ashldi3";
@@ -384,7 +384,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8";
Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16";
- if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) {
+ if (TT.getEnvironment() == Triple::GNU) {
Names[RTLIB::SINCOS_F32] = "sincosf";
Names[RTLIB::SINCOS_F64] = "sincos";
Names[RTLIB::SINCOS_F80] = "sincosl";
@@ -399,7 +399,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SINCOS_PPCF128] = nullptr;
}
- if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) {
+ if (TT.getOS() != Triple::OpenBSD) {
Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
} else {
// These are generally not available.
@@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
+ BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
JumpBufSize = 0;
@@ -702,7 +703,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
SupportJumpTables = true;
MinimumJumpTableEntries = 4;
- InitLibcallNames(LibcallRoutineNames, TM);
+ InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple()));
InitCmpLibcallCCs(CmpLibcallCCs);
InitLibcallCallingConvs(LibcallCallingConvs);
}
@@ -730,6 +731,10 @@ void TargetLoweringBase::initActions() {
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
+ // Most backends expect to see the node which just returns the value loaded.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ (MVT::SimpleValueType)VT, Expand);
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -739,8 +744,15 @@ void TargetLoweringBase::initActions() {
// These operations default to expand for vector types.
if (VT >= MVT::FIRST_VECTOR_VALUETYPE &&
- VT <= MVT::LAST_VECTOR_VALUETYPE)
+ VT <= MVT::LAST_VECTOR_VALUETYPE) {
setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG,
+ (MVT::SimpleValueType)VT, Expand);
+ }
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -1080,24 +1092,25 @@ void TargetLoweringBase::computeRegisterProperties() {
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
- MVT VT = (MVT::SimpleValueType)i;
- if (isTypeLegal(VT)) continue;
+ MVT VT = (MVT::SimpleValueType) i;
+ if (isTypeLegal(VT))
+ continue;
- // Determine if there is a legal wider type. If so, we should promote to
- // that wider vector type.
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
- if (NElts != 1 && !shouldSplitVectorType(VT)) {
- bool IsLegalWiderType = false;
- // First try to promote the elements of integer vectors. If no legal
- // promotion was found, fallback to the widen-vector method.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
+ bool IsLegalWiderType = false;
+ LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
+ switch (PreferredAction) {
+ case TypePromoteInteger: {
+ // Try to promote the elements of integer vectors. If no legal
+ // promotion was found, fall through to the widen-vector method.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits()
- && SVT.getVectorNumElements() == NElts &&
- isTypeLegal(SVT) && SVT.getScalarType().isInteger()) {
+ && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)
+ && SVT.getScalarType().isInteger()) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1106,15 +1119,15 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
-
- if (IsLegalWiderType) continue;
-
+ if (IsLegalWiderType)
+ break;
+ }
+ case TypeWidenVector: {
// Try to widen the vector.
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType)nVT;
- if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeLegal(SVT)) {
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT
+ && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1123,27 +1136,34 @@ void TargetLoweringBase::computeRegisterProperties() {
break;
}
}
- if (IsLegalWiderType) continue;
+ if (IsLegalWiderType)
+ break;
}
-
- MVT IntermediateVT;
- MVT RegisterVT;
- unsigned NumIntermediates;
- NumRegistersForVT[i] =
- getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
- RegisterVT, this);
- RegisterTypeForVT[i] = RegisterVT;
-
- MVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- unsigned NumElts = VT.getVectorNumElements();
- ValueTypeActions.setTypeAction(VT,
- NumElts > 1 ? TypeSplitVector : TypeScalarizeVector);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ case TypeSplitVector:
+ case TypeScalarizeVector: {
+ MVT IntermediateVT;
+ MVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
+ NumIntermediates, RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ MVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ if (PreferredAction == TypeScalarizeVector)
+ ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
+ else
+ ValueTypeActions.setTypeAction(VT, TypeSplitVector);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown vector legalization action!");
}
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index dda2259..03f4a51 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -48,16 +48,12 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
MachineModuleInfo *MMI) const {
unsigned Encoding = getPersonalityEncoding();
- switch (Encoding & 0x70) {
- default:
- report_fatal_error("We do not support this DWARF encoding yet!");
- case dwarf::DW_EH_PE_absptr:
- return TM.getSymbol(GV, Mang);
- case dwarf::DW_EH_PE_pcrel: {
+ if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
TM.getSymbol(GV, Mang)->getName());
- }
- }
+ if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
+ return TM.getSymbol(GV, Mang);
+ report_fatal_error("We do not support this DWARF encoding yet!");
}
void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
@@ -196,6 +192,18 @@ getELFSectionFlags(SectionKind K) {
return Flags;
}
+static const Comdat *getELFComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return nullptr;
+
+ if (C->getSelectionKind() != Comdat::Any)
+ report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" +
+ C->getName() + "' cannot be lowered.");
+
+ return C;
+}
+
const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -204,14 +212,20 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
// Infer section flags from the section name if we can.
Kind = getELFKindForNamedSection(SectionName, Kind);
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (const Comdat *C = getELFComdat(GV)) {
+ Group = C->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
return getContext().getELFSection(SectionName,
- getELFSectionType(SectionName, Kind),
- getELFSectionFlags(Kind), Kind);
+ getELFSectionType(SectionName, Kind), Flags,
+ Kind, /*EntrySize=*/0, Group);
}
/// getSectionPrefixForGlobal - Return the section prefix name used by options
/// FunctionsSections and DataSections.
-static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
if (Kind.isText()) return ".text.";
if (Kind.isReadOnly()) return ".rodata.";
if (Kind.isBSS()) return ".bss.";
@@ -228,7 +242,6 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) {
return ".data.rel.ro.";
}
-
const MCSection *TargetLoweringObjectFileELF::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
@@ -242,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// If this global is linkonce/weak and the target handles this by emitting it
// into a 'uniqued' section name, create and return the section now.
- if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+ if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
!Kind.isCommon()) {
- const char *Prefix;
- Prefix = getSectionPrefixForGlobal(Kind);
+ StringRef Prefix = getSectionPrefixForGlobal(Kind);
- SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+ SmallString<128> Name(Prefix);
TM.getNameWithPrefix(Name, GV, Mang, true);
StringRef Group = "";
unsigned Flags = getELFSectionFlags(Kind);
- if (GV->isWeakForLinker()) {
- Group = Name.substr(strlen(Prefix));
+ if (GV->isWeakForLinker() || GV->hasComdat()) {
+ if (const Comdat *C = getELFComdat(GV))
+ Group = C->getName();
+ else
+ Group = Name.substr(Prefix.size());
Flags |= ELF::SHF_GROUP;
}
@@ -340,7 +355,7 @@ getSectionForConstant(SectionKind Kind) const {
}
const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ unsigned Priority, const MCSymbol *KeySym) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -360,7 +375,7 @@ const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
}
const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ unsigned Priority, const MCSymbol *KeySym) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -487,6 +502,15 @@ emitModuleFlags(MCStreamer &Streamer,
Streamer.AddBlankLine();
}
+static void checkMachOComdat(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ if (!C)
+ return;
+
+ report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() +
+ "' cannot be lowered.");
+}
+
const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -494,6 +518,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
StringRef Segment, Section;
unsigned TAA = 0, StubSize = 0;
bool TAAParsed;
+
+ checkMachOComdat(GV);
+
std::string ErrorCode =
MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
TAA, TAAParsed, StubSize);
@@ -564,6 +591,7 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols(
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler &Mang, const TargetMachine &TM) const {
+ checkMachOComdat(GV);
// Handle thread local data.
if (Kind.isThreadBSS()) return TLSBSSSection;
@@ -732,6 +760,50 @@ getCOFFSectionFlags(SectionKind K) {
return Flags;
}
+static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) {
+ const Comdat *C = GV->getComdat();
+ assert(C && "expected GV to have a Comdat!");
+
+ StringRef ComdatGVName = C->getName();
+ const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName);
+ if (!ComdatGV)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' does not exist.");
+
+ if (ComdatGV->getComdat() != C)
+ report_fatal_error("Associative COMDAT symbol '" + ComdatGVName +
+ "' is not a key for it's COMDAT.");
+
+ return ComdatGV;
+}
+
+static int getSelectionForCOFF(const GlobalValue *GV) {
+ if (const Comdat *C = GV->getComdat()) {
+ const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
+ if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
+ ComdatKey = GA->getBaseObject();
+ if (ComdatKey == GV) {
+ switch (C->getSelectionKind()) {
+ case Comdat::Any:
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ case Comdat::ExactMatch:
+ return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH;
+ case Comdat::Largest:
+ return COFF::IMAGE_COMDAT_SELECT_LARGEST;
+ case Comdat::NoDuplicates:
+ return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ case Comdat::SameSize:
+ return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE;
+ }
+ } else {
+ return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
+ }
+ } else if (GV->isWeakForLinker()) {
+ return COFF::IMAGE_COMDAT_SELECT_ANY;
+ }
+ return 0;
+}
+
const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
const TargetMachine &TM) const {
@@ -739,11 +811,21 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
unsigned Characteristics = getCOFFSectionFlags(Kind);
StringRef Name = GV->getSection();
StringRef COMDATSymName = "";
- if (GV->isWeakForLinker()) {
- Selection = COFF::IMAGE_COMDAT_SELECT_ANY;
- Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
- COMDATSymName = Sym->getName();
+ if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) {
+ Selection = getSelectionForCOFF(GV);
+ const GlobalValue *ComdatGV;
+ if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ COMDATSymName = Sym->getName();
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+ } else {
+ Selection = 0;
+ }
}
return getContext().getCOFFSection(Name,
Characteristics,
@@ -780,17 +862,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// into a 'uniqued' section name, create and return the section now.
// Section names depend on the name of the symbol which is not feasible if the
// symbol has private linkage.
- if ((GV->isWeakForLinker() || EmitUniquedSection) &&
- !GV->hasPrivateLinkage() && !Kind.isCommon()) {
+ if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) &&
+ !Kind.isCommon()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
- MCSymbol *Sym = TM.getSymbol(GV, Mang);
- return getContext().getCOFFSection(
- Name, Characteristics, Kind, Sym->getName(),
- GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY
- : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES);
+ int Selection = getSelectionForCOFF(GV);
+ if (!Selection)
+ Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
+ const GlobalValue *ComdatGV;
+ if (GV->hasComdat())
+ ComdatGV = getComdatGVForCOFF(GV);
+ else
+ ComdatGV = GV;
+
+ if (!ComdatGV->hasPrivateLinkage()) {
+ MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+ StringRef COMDATSymName = Sym->getName();
+ return getContext().getCOFFSection(Name, Characteristics, Kind,
+ COMDATSymName, Selection);
+ }
}
if (Kind.isText())
@@ -868,8 +960,7 @@ emitModuleFlags(MCStreamer &Streamer,
static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
const MCSection *Sec,
- const MCSymbol *KeySym,
- const MCSection *KeySec) {
+ const MCSymbol *KeySym) {
// Return the normal section if we don't have to be associative.
if (!KeySym)
return Sec;
@@ -877,20 +968,19 @@ static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
// Make an associative section with the same name and kind as the normal
// section.
const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
- const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec);
unsigned Characteristics =
SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
SecCOFF->getKind(), KeySym->getName(),
- COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF);
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
- return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec);
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym);
}
const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
- unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
- return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec);
+ unsigned Priority, const MCSymbol *KeySym) const {
+ return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym);
}
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index e52e8af..3961905 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -734,7 +734,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
object::RelocToApply R(V.visit(Type, Reloc, 0, SymAddr));
if (V.error()) {
SmallString<32> Name;
- error_code ec(Reloc.getTypeName(Name));
+ std::error_code ec(Reloc.getTypeName(Name));
if (ec) {
errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n";
}
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index 2524adc..fe7e46d 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -15,6 +15,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
+#include <set>
using namespace llvm;
void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
@@ -30,6 +31,7 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
uint64_t HighPC = Desc.getEndAddress();
appendRange(CUOffset, LowPC, HighPC);
}
+ ParsedCUOffsets.insert(CUOffset);
}
}
@@ -56,69 +58,55 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
}
}
- sortAndMinimize();
+ construct();
}
void DWARFDebugAranges::clear() {
+ Endpoints.clear();
Aranges.clear();
ParsedCUOffsets.clear();
}
void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
uint64_t HighPC) {
- if (!Aranges.empty()) {
- if (Aranges.back().CUOffset == CUOffset &&
- Aranges.back().HighPC() == LowPC) {
- Aranges.back().setHighPC(HighPC);
- return;
- }
- }
- Aranges.push_back(Range(LowPC, HighPC, CUOffset));
-}
-
-void DWARFDebugAranges::sortAndMinimize() {
- const size_t orig_arange_size = Aranges.size();
- // Size of one? If so, no sorting is needed
- if (orig_arange_size <= 1)
+ if (LowPC >= HighPC)
return;
- // Sort our address range entries
- std::stable_sort(Aranges.begin(), Aranges.end());
-
- // Most address ranges are contiguous from function to function
- // so our new ranges will likely be smaller. We calculate the size
- // of the new ranges since although std::vector objects can be resized,
- // the will never reduce their allocated block size and free any excesss
- // memory, so we might as well start a brand new collection so it is as
- // small as possible.
-
- // First calculate the size of the new minimal arange vector
- // so we don't have to do a bunch of re-allocations as we
- // copy the new minimal stuff over to the new collection.
- size_t minimal_size = 1;
- for (size_t i = 1; i < orig_arange_size; ++i) {
- if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i]))
- ++minimal_size;
- }
+ Endpoints.emplace_back(LowPC, CUOffset, true);
+ Endpoints.emplace_back(HighPC, CUOffset, false);
+}
- // Else, make a new RangeColl that _only_ contains what we need.
- RangeColl minimal_aranges;
- minimal_aranges.resize(minimal_size);
- uint32_t j = 0;
- minimal_aranges[j] = Aranges[0];
- for (size_t i = 1; i < orig_arange_size; ++i) {
- if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) {
- minimal_aranges[j].setHighPC(Aranges[i].HighPC());
+void DWARFDebugAranges::construct() {
+ std::multiset<uint32_t> ValidCUs; // Maintain the set of CUs describing
+ // a current address range.
+ std::sort(Endpoints.begin(), Endpoints.end());
+ uint64_t PrevAddress = -1ULL;
+ for (const auto &E : Endpoints) {
+ if (PrevAddress < E.Address && ValidCUs.size() > 0) {
+ // If the address range between two endpoints is described by some
+ // CU, first try to extend the last range in Aranges. If we can't
+ // do it, start a new range.
+ if (!Aranges.empty() && Aranges.back().HighPC() == PrevAddress &&
+ ValidCUs.find(Aranges.back().CUOffset) != ValidCUs.end()) {
+ Aranges.back().setHighPC(E.Address);
+ } else {
+ Aranges.emplace_back(PrevAddress, E.Address, *ValidCUs.begin());
+ }
+ }
+ // Update the set of valid CUs.
+ if (E.IsRangeStart) {
+ ValidCUs.insert(E.CUOffset);
} else {
- // Only increment j if we aren't merging
- minimal_aranges[++j] = Aranges[i];
+ auto CUPos = ValidCUs.find(E.CUOffset);
+ assert(CUPos != ValidCUs.end());
+ ValidCUs.erase(CUPos);
}
+ PrevAddress = E.Address;
}
- assert(j+1 == minimal_size);
+ assert(ValidCUs.empty());
- // Now swap our new minimal aranges into place. The local
- // minimal_aranges will then contian the old big collection
- // which will get freed.
- minimal_aranges.swap(Aranges);
+ // Endpoints are not needed now.
+ std::vector<RangeEndpoint> EmptyEndpoints;
+ EmptyEndpoints.swap(Endpoints);
}
uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const {
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index de96d7f..a9f37fe 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -27,9 +27,9 @@ private:
void clear();
void extract(DataExtractor DebugArangesData);
- // Use appendRange multiple times and then call sortAndMinimize.
+ // Call appendRange multiple times and then call construct.
void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
- void sortAndMinimize();
+ void construct();
struct Range {
explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL,
@@ -47,31 +47,39 @@ private:
return LowPC + Length;
return -1ULL;
}
+
bool containsAddress(uint64_t Address) const {
return LowPC <= Address && Address < HighPC();
}
-
- bool operator <(const Range &other) const {
+ bool operator<(const Range &other) const {
return LowPC < other.LowPC;
}
- static bool SortedOverlapCheck(const Range &Left, const Range &Right) {
- if (Left.CUOffset != Right.CUOffset)
- return false;
- return Left.HighPC() >= Right.LowPC;
- }
-
uint64_t LowPC; // Start of address range.
uint32_t Length; // End of address range (not including this address).
uint32_t CUOffset; // Offset of the compile unit or die.
};
+ struct RangeEndpoint {
+ uint64_t Address;
+ uint32_t CUOffset;
+ bool IsRangeStart;
+
+ RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart)
+ : Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {}
+
+ bool operator<(const RangeEndpoint &Other) const {
+ return Address < Other.Address;
+ }
+ };
+
+
typedef std::vector<Range> RangeColl;
typedef RangeColl::const_iterator RangeCollIterator;
- typedef DenseSet<uint32_t> ParsedCUOffsetColl;
+ std::vector<RangeEndpoint> Endpoints;
RangeColl Aranges;
- ParsedCUOffsetColl ParsedCUOffsets;
+ DenseSet<uint32_t> ParsedCUOffsets;
};
}
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index b811ed7..2e7a54a 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -210,6 +210,16 @@ uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSectionOffset(
return Result.hasValue() ? Result.getValue() : FailValue;
}
+uint64_t
+DWARFDebugInfoEntryMinimal::getRangesBaseAttribute(const DWARFUnit *U,
+ uint64_t FailValue) const {
+ uint64_t Result =
+ getAttributeValueAsSectionOffset(U, DW_AT_ranges_base, -1ULL);
+ if (Result != -1ULL)
+ return Result;
+ return getAttributeValueAsSectionOffset(U, DW_AT_GNU_ranges_base, FailValue);
+}
+
bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U,
uint64_t &LowPC,
uint64_t &HighPC) const {
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index 916e1ed..cc58eb6 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -106,6 +106,8 @@ public:
const uint16_t Attr,
uint64_t FailValue) const;
+ uint64_t getRangesBaseAttribute(const DWARFUnit *U, uint64_t FailValue) const;
+
/// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
/// Returns true if both attributes are present.
bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC,
diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp
index f5f5072..39d0a0f 100644
--- a/lib/DebugInfo/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARFUnit.cpp
@@ -226,7 +226,9 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
AddrOffsetSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
this, DW_AT_GNU_addr_base, 0);
RangeSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
- this, DW_AT_GNU_ranges_base, 0);
+ this, DW_AT_ranges_base, 0);
+ // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
+ // skeleton CU DIE, so that DWARF users not aware of it are not broken.
}
setDIERelations();
@@ -272,7 +274,8 @@ bool DWARFUnit::parseDWO() {
}
// Share .debug_addr and .debug_ranges section with compile unit in .dwo
DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
- DWOCU->setRangesSection(RangeSection, RangeSectionBase);
+ uint32_t DWORangesBase = DieArray[0].getRangesBaseAttribute(this, 0);
+ DWOCU->setRangesSection(RangeSection, DWORangesBase);
return true;
}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 6766ef1..b0e985d 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -148,8 +148,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
}
-void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
- const GlobalValue *ToUnmap) {
+void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) {
GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
void *OldVal;
@@ -171,14 +170,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
<< "\' to [" << Addr << "]\n";);
- void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
+ void *&CurVal = EEState.getGlobalAddressMap()[GV];
assert((!CurVal || !Addr) && "GlobalMapping already established!");
CurVal = Addr;
// If we are using the reverse mapping, add it too.
- if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ if (!EEState.getGlobalAddressReverseMap().empty()) {
AssertingVH<const GlobalValue> &V =
- EEState.getGlobalAddressReverseMap(locked)[Addr];
+ EEState.getGlobalAddressReverseMap()[Addr];
assert((!V || !GV) && "GlobalMapping already established!");
V = GV;
}
@@ -187,41 +186,41 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
void ExecutionEngine::clearAllGlobalMappings() {
MutexGuard locked(lock);
- EEState.getGlobalAddressMap(locked).clear();
- EEState.getGlobalAddressReverseMap(locked).clear();
+ EEState.getGlobalAddressMap().clear();
+ EEState.getGlobalAddressReverseMap().clear();
}
void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
MutexGuard locked(lock);
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
- EEState.RemoveMapping(locked, FI);
+ EEState.RemoveMapping(FI);
for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
GI != GE; ++GI)
- EEState.RemoveMapping(locked, GI);
+ EEState.RemoveMapping(GI);
}
void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
MutexGuard locked(lock);
ExecutionEngineState::GlobalAddressMapTy &Map =
- EEState.getGlobalAddressMap(locked);
+ EEState.getGlobalAddressMap();
// Deleting from the mapping?
if (!Addr)
- return EEState.RemoveMapping(locked, GV);
+ return EEState.RemoveMapping(GV);
void *&CurVal = Map[GV];
void *OldVal = CurVal;
- if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
- EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
+ if (CurVal && !EEState.getGlobalAddressReverseMap().empty())
+ EEState.getGlobalAddressReverseMap().erase(CurVal);
CurVal = Addr;
// If we are using the reverse mapping, add it too.
- if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+ if (!EEState.getGlobalAddressReverseMap().empty()) {
AssertingVH<const GlobalValue> &V =
- EEState.getGlobalAddressReverseMap(locked)[Addr];
+ EEState.getGlobalAddressReverseMap()[Addr];
assert((!V || !GV) && "GlobalMapping already established!");
V = GV;
}
@@ -232,25 +231,25 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
MutexGuard locked(lock);
ExecutionEngineState::GlobalAddressMapTy::iterator I =
- EEState.getGlobalAddressMap(locked).find(GV);
- return I != EEState.getGlobalAddressMap(locked).end() ? I->second : nullptr;
+ EEState.getGlobalAddressMap().find(GV);
+ return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr;
}
const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
MutexGuard locked(lock);
// If we haven't computed the reverse mapping yet, do so first.
- if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+ if (EEState.getGlobalAddressReverseMap().empty()) {
for (ExecutionEngineState::GlobalAddressMapTy::iterator
- I = EEState.getGlobalAddressMap(locked).begin(),
- E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
- EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+ I = EEState.getGlobalAddressMap().begin(),
+ E = EEState.getGlobalAddressMap().end(); I != E; ++I)
+ EEState.getGlobalAddressReverseMap().insert(std::make_pair(
I->second, I->first));
}
std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
- EEState.getGlobalAddressReverseMap(locked).find(Addr);
- return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : nullptr;
+ EEState.getGlobalAddressReverseMap().find(Addr);
+ return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr;
}
namespace {
@@ -412,13 +411,14 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
std::string *ErrorStr,
CodeGenOpt::Level OptLevel,
bool GVsWithCode) {
- EngineBuilder EB = EngineBuilder(M)
- .setEngineKind(ForceInterpreter
- ? EngineKind::Interpreter
- : EngineKind::JIT)
- .setErrorStr(ErrorStr)
- .setOptLevel(OptLevel)
- .setAllocateGVsWithCode(GVsWithCode);
+
+ EngineBuilder EB =
+ EngineBuilder(M)
+ .setEngineKind(ForceInterpreter ? EngineKind::Interpreter
+ : EngineKind::Either)
+ .setErrorStr(ErrorStr)
+ .setOptLevel(OptLevel)
+ .setAllocateGVsWithCode(GVsWithCode);
return EB.create();
}
@@ -457,6 +457,27 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
}
+void EngineBuilder::InitEngine() {
+ WhichEngine = EngineKind::Either;
+ ErrorStr = nullptr;
+ OptLevel = CodeGenOpt::Default;
+ MCJMM = nullptr;
+ JMM = nullptr;
+ Options = TargetOptions();
+ AllocateGVsWithCode = false;
+ RelocModel = Reloc::Default;
+ CMModel = CodeModel::JITDefault;
+ UseMCJIT = false;
+
+// IR module verification is enabled by default in debug builds, and disabled
+// by default in release builds.
+#ifndef NDEBUG
+ VerifyModules = true;
+#else
+ VerifyModules = false;
+#endif
+}
+
ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
std::unique_ptr<TargetMachine> TheTM(TM); // Take ownership.
@@ -536,7 +557,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
return getPointerToFunction(F);
MutexGuard locked(lock);
- if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+ if (void *P = EEState.getGlobalAddressMap()[GV])
return P;
// Global variable might have been added since interpreter started.
@@ -546,7 +567,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
else
llvm_unreachable("Global hasn't had an address allocated yet!");
- return EEState.getGlobalAddressMap(locked)[GV];
+ return EEState.getGlobalAddressMap()[GV];
}
/// \brief Converts a Constant* into a GenericValue, including handling of
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 9a65fa0..4e22a8b 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -86,7 +86,7 @@ static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress,
LineNumberInfo Result;
Result.Offset = Address - StartAddress;
- Result.LineNumber = Line.getLine();
+ Result.LineNumber = Line.Line;
return Result;
}
@@ -233,7 +233,7 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
FunctionMessage.line_number_size = 0;
FunctionMessage.line_number_table = 0;
} else {
- SourceFileName = Lines.front().second.getFileName();
+ SourceFileName = Lines.front().second.FileName;
FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str());
FunctionMessage.line_number_size = LineInfo.size();
FunctionMessage.line_number_table = &*LineInfo.begin();
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index c589457..814efcc 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -34,7 +34,7 @@ extern "C" void LLVMLinkInInterpreter() { }
///
ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
// Tell this Module to materialize everything and release the GVMaterializer.
- if (error_code EC = M->materializeAllPermanently()) {
+ if (std::error_code EC = M->materializeAllPermanently()) {
if (ErrStr)
*ErrStr = EC.message();
// We got an error, just return 0
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index f8b2827..83ec978 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -151,7 +151,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
// Add target data
MutexGuard locked(lock);
- FunctionPassManager &PM = jitstate->getPM(locked);
+ FunctionPassManager &PM = jitstate->getPM();
M->setDataLayout(TM.getDataLayout());
PM.add(new DataLayoutPass(M));
@@ -184,7 +184,7 @@ void JIT::addModule(Module *M) {
jitstate = new JITState(M);
- FunctionPassManager &PM = jitstate->getPM(locked);
+ FunctionPassManager &PM = jitstate->getPM();
M->setDataLayout(TM.getDataLayout());
PM.add(new DataLayoutPass(M));
@@ -216,7 +216,7 @@ bool JIT::removeModule(Module *M) {
if (!jitstate && !Modules.empty()) {
jitstate = new JITState(Modules[0]);
- FunctionPassManager &PM = jitstate->getPM(locked);
+ FunctionPassManager &PM = jitstate->getPM();
M->setDataLayout(TM.getDataLayout());
PM.add(new DataLayoutPass(M));
@@ -460,41 +460,41 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
if (MCI)
RegisterJITEventListener(&MCIL);
- runJITOnFunctionUnlocked(F, locked);
+ runJITOnFunctionUnlocked(F);
if (MCI)
UnregisterJITEventListener(&MCIL);
}
-void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+void JIT::runJITOnFunctionUnlocked(Function *F) {
assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
- jitTheFunction(F, locked);
+ jitTheFunctionUnlocked(F);
// If the function referred to another function that had not yet been
// read from bitcode, and we are jitting non-lazily, emit it now.
- while (!jitstate->getPendingFunctions(locked).empty()) {
- Function *PF = jitstate->getPendingFunctions(locked).back();
- jitstate->getPendingFunctions(locked).pop_back();
+ while (!jitstate->getPendingFunctions().empty()) {
+ Function *PF = jitstate->getPendingFunctions().back();
+ jitstate->getPendingFunctions().pop_back();
assert(!PF->hasAvailableExternallyLinkage() &&
"Externally-defined function should not be in pending list.");
- jitTheFunction(PF, locked);
+ jitTheFunctionUnlocked(PF);
// Now that the function has been jitted, ask the JITEmitter to rewrite
// the stub with real address of the function.
- updateFunctionStub(PF);
+ updateFunctionStubUnlocked(PF);
}
}
-void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+void JIT::jitTheFunctionUnlocked(Function *F) {
isAlreadyCodeGenerating = true;
- jitstate->getPM(locked).run(*F);
+ jitstate->getPM().run(*F);
isAlreadyCodeGenerating = false;
// clear basic block addresses after this function is done
- getBasicBlockAddressMap(locked).clear();
+ getBasicBlockAddressMap().clear();
}
/// getPointerToFunction - This method is used to get the address of the
@@ -526,7 +526,7 @@ void *JIT::getPointerToFunction(Function *F) {
return Addr;
}
- runJITOnFunctionUnlocked(F, locked);
+ runJITOnFunctionUnlocked(F);
void *Addr = getPointerToGlobalIfAvailable(F);
assert(Addr && "Code generation didn't add function to GlobalAddress table!");
@@ -537,9 +537,9 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
MutexGuard locked(lock);
BasicBlockAddressMapTy::iterator I =
- getBasicBlockAddressMap(locked).find(BB);
- if (I == getBasicBlockAddressMap(locked).end()) {
- getBasicBlockAddressMap(locked)[BB] = Addr;
+ getBasicBlockAddressMap().find(BB);
+ if (I == getBasicBlockAddressMap().end()) {
+ getBasicBlockAddressMap()[BB] = Addr;
} else {
// ignore repeats: some BBs can be split into few MBBs?
}
@@ -547,7 +547,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
MutexGuard locked(lock);
- getBasicBlockAddressMap(locked).erase(BB);
+ getBasicBlockAddressMap().erase(BB);
}
void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
@@ -558,8 +558,8 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
MutexGuard locked(lock);
BasicBlockAddressMapTy::iterator I =
- getBasicBlockAddressMap(locked).find(BB);
- if (I != getBasicBlockAddressMap(locked).end()) {
+ getBasicBlockAddressMap().find(BB);
+ if (I != getBasicBlockAddressMap().end()) {
return I->second;
} else {
llvm_unreachable("JIT does not have BB address for address-of-label, was"
@@ -688,7 +688,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) {
void JIT::addPendingFunction(Function *F) {
MutexGuard locked(lock);
- jitstate->getPendingFunctions(locked).push_back(F);
+ jitstate->getPendingFunctions().push_back(F);
}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index d2bd508..69a7c36 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -39,12 +39,12 @@ private:
public:
explicit JITState(Module *M) : PM(M), M(M) {}
- FunctionPassManager &getPM(const MutexGuard &L) {
+ FunctionPassManager &getPM() {
return PM;
}
Module *getModule() const { return M; }
- std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
+ std::vector<AssertingVH<Function> > &getPendingFunctions() {
return PendingFunctions;
}
};
@@ -205,7 +205,7 @@ public:
void NotifyFreeingMachineCode(void *OldPtr);
BasicBlockAddressMapTy &
- getBasicBlockAddressMap(const MutexGuard &) {
+ getBasicBlockAddressMap() {
return BasicBlockAddressMap;
}
@@ -213,9 +213,9 @@ public:
private:
static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
TargetMachine &tm);
- void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
- void updateFunctionStub(Function *F);
- void jitTheFunction(Function *F, const MutexGuard &locked);
+ void runJITOnFunctionUnlocked(Function *F);
+ void updateFunctionStubUnlocked(Function *F);
+ void jitTheFunctionUnlocked(Function *F);
protected:
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index cd7a500..50b8c10 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Support/Debug.h"
@@ -120,21 +121,16 @@ namespace {
#endif
}
- FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
- const MutexGuard& locked) {
- assert(locked.holds(TheJIT->lock));
+ FunctionToLazyStubMapTy& getFunctionToLazyStubMap() {
return FunctionToLazyStubMap;
}
- GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) {
- assert(lck.holds(TheJIT->lock));
+ GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() {
return GlobalToIndirectSymMap;
}
std::pair<void *, Function *> LookupFunctionFromCallSite(
- const MutexGuard &locked, void *CallSite) const {
- assert(locked.holds(TheJIT->lock));
-
+ void *CallSite) const {
// The address given to us for the stub may not be exactly right, it
// might be a little bit after the stub. As such, use upper_bound to
// find it.
@@ -146,9 +142,7 @@ namespace {
return *I;
}
- void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
- assert(locked.holds(TheJIT->lock));
-
+ void AddCallSite(void *CallSite, Function *F) {
bool Inserted = CallSiteToFunctionMap.insert(
std::make_pair(CallSite, F)).second;
(void)Inserted;
@@ -503,7 +497,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
MutexGuard locked(TheJIT->lock);
// If we already have a stub for this function, recycle it.
- return state.getFunctionToLazyStubMap(locked).lookup(F);
+ return state.getFunctionToLazyStubMap().lookup(F);
}
/// getFunctionStub - This returns a pointer to a function stub, creating
@@ -512,7 +506,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
MutexGuard locked(TheJIT->lock);
// If we already have a lazy stub for this function, recycle it.
- void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+ void *&Stub = state.getFunctionToLazyStubMap()[F];
if (Stub) return Stub;
// Call the lazy resolver function if we are JIT'ing lazily. Otherwise we
@@ -554,7 +548,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
// Finally, keep track of the stub-to-Function mapping so that the
// JITCompilerFn knows which function to compile!
- state.AddCallSite(locked, Stub, F);
+ state.AddCallSite(Stub, F);
} else if (!Actual) {
// If we are JIT'ing non-lazily but need to call a function that does not
// exist yet, add it to the JIT's work list so that we can fill in the
@@ -573,7 +567,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
MutexGuard locked(TheJIT->lock);
// If we already have a stub for this global variable, recycle it.
- void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+ void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV];
if (IndirectSym) return IndirectSym;
// Otherwise, codegen a new indirect symbol.
@@ -633,7 +627,7 @@ void *JITResolver::JITCompilerFn(void *Stub) {
// The address given to us for the stub may not be exactly right, it might
// be a little bit after the stub. As such, use upper_bound to find it.
std::pair<void*, Function*> I =
- JR->state.LookupFunctionFromCallSite(locked, Stub);
+ JR->state.LookupFunctionFromCallSite(Stub);
F = I.second;
ActualPtr = I.first;
}
@@ -684,13 +678,23 @@ void *JITResolver::JITCompilerFn(void *Stub) {
//===----------------------------------------------------------------------===//
// JITEmitter code.
//
+
+static GlobalObject *getSimpleAliasee(Constant *C) {
+ C = C->stripPointerCasts();
+ return dyn_cast<GlobalObject>(C);
+}
+
void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
bool MayNeedFarStub) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return TheJIT->getOrEmitGlobalVariable(GV);
- if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
- return TheJIT->getPointerToGlobal(GA->getAliasee());
+ if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ // We can only handle simple cases.
+ if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee()))
+ return TheJIT->getPointerToGlobal(GV);
+ return nullptr;
+ }
// If we have already compiled the function, return a pointer to its body.
Function *F = cast<Function>(V);
@@ -1225,7 +1229,7 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
return JE->getJITResolver().getLazyFunctionStub(F);
}
-void JIT::updateFunctionStub(Function *F) {
+void JIT::updateFunctionStubUnlocked(Function *F) {
// Get the empty stub we generated earlier.
JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter());
void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 42cb4ea..e9ba96a 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -305,9 +305,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
// Look for our symbols in each Archive
object::Archive::child_iterator ChildIt = A->findSym(Name);
if (ChildIt != A->child_end()) {
- std::unique_ptr<object::Binary> ChildBin;
// FIXME: Support nested archives?
- if (!ChildIt->getAsBinary(ChildBin) && ChildBin->isObject()) {
+ ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
+ ChildIt->getAsBinary();
+ if (ChildBinOrErr.getError())
+ continue;
+ std::unique_ptr<object::Binary> ChildBin = std::move(ChildBinOrErr.get());
+ if (ChildBin->isObject()) {
std::unique_ptr<object::ObjectFile> OF(
static_cast<object::ObjectFile *>(ChildBin.release()));
// This causes the object file to be loaded.
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index 9ceaa90..5986084 100644
--- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -71,7 +71,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
//
// FIXME: Initialize the Near member for each memory group to avoid
// interleaving.
- error_code ec;
+ std::error_code ec;
sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize,
&MemGroup.Near,
sys::Memory::MF_READ |
@@ -105,7 +105,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
{
// FIXME: Should in-progress permissions be reverted if an error occurs?
- error_code ec;
+ std::error_code ec;
// Don't allow free memory blocks to be used after setting protection flags.
CodeMem.FreeMem.clear();
@@ -143,19 +143,20 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
return false;
}
-error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
- unsigned Permissions) {
+std::error_code
+SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+ unsigned Permissions) {
for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
- error_code ec;
- ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i],
- Permissions);
- if (ec) {
- return ec;
- }
+ std::error_code ec;
+ ec =
+ sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], Permissions);
+ if (ec) {
+ return ec;
+ }
}
- return error_code::success();
+ return std::error_code();
}
void SectionMemoryManager::invalidateInstructionCache() {
diff --git a/lib/ExecutionEngine/RuntimeDyld/Android.mk b/lib/ExecutionEngine/RuntimeDyld/Android.mk
index e98e80a..eb2e438 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Android.mk
+++ b/lib/ExecutionEngine/RuntimeDyld/Android.mk
@@ -7,6 +7,7 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
GDBRegistrar.cpp \
RuntimeDyld.cpp \
+ RuntimeDyldChecker.cpp \
RuntimeDyldELF.cpp \
RuntimeDyldMachO.cpp
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index cbf7cf1..eb1a60b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMRuntimeDyld
GDBRegistrar.cpp
RuntimeDyld.cpp
+ RuntimeDyldChecker.cpp
RuntimeDyldELF.cpp
RuntimeDyldMachO.cpp
)
diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
index 97dc861..8bd5621 100644
--- a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = RuntimeDyld
parent = ExecutionEngine
-required_libraries = Object Support
+required_libraries = MC Object Support
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 4917b93..c3a2182 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -48,7 +48,8 @@ public:
{
// FIXME: error checking? createObjectFile returns an ErrorOr<ObjectFile*>
// and should probably be checked for failure.
- ObjFile.reset(object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get());
+ std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer());
+ ObjFile.reset(object::ObjectFile::createObjectFile(Buf).get());
}
ObjectImageCommon(std::unique_ptr<object::ObjectFile> Input)
: ObjectImage(nullptr), ObjFile(std::move(Input)) {}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index c1eb0fd..9dfd167 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -73,9 +73,9 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
llvm_unreachable("Attempting to remap address of unknown section!");
}
-static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
+static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
uint64_t Address;
- if (error_code EC = Sym.getAddress(Address))
+ if (std::error_code EC = Sym.getAddress(Address))
return EC;
if (Address == UnknownAddressOrSize) {
@@ -85,7 +85,7 @@ static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
const ObjectFile *Obj = Sym.getObject();
section_iterator SecI(Obj->section_begin());
- if (error_code EC = Sym.getSection(SecI))
+ if (std::error_code EC = Sym.getSection(SecI))
return EC;
if (SecI == Obj->section_end()) {
@@ -94,7 +94,7 @@ static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
}
uint64_t SectionAddress;
- if (error_code EC = SecI->getAddress(SectionAddress))
+ if (std::error_code EC = SecI->getAddress(SectionAddress))
return EC;
Result = Address - SectionAddress;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
new file mode 100644
index 0000000..190bbbf
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -0,0 +1,641 @@
+//===--- RuntimeDyldChecker.cpp - RuntimeDyld tester framework --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/StringRefMemoryObject.h"
+#include "RuntimeDyldImpl.h"
+#include <cctype>
+#include <memory>
+
+#define DEBUG_TYPE "rtdyld"
+
+using namespace llvm;
+
+namespace llvm {
+
+ // Helper class that implements the language evaluated by RuntimeDyldChecker.
+ class RuntimeDyldCheckerExprEval {
+ public:
+
+ RuntimeDyldCheckerExprEval(const RuntimeDyldChecker &Checker,
+ llvm::raw_ostream &ErrStream)
+ : Checker(Checker), ErrStream(ErrStream) {}
+
+ bool evaluate(StringRef Expr) const {
+ // Expect equality expression of the form 'LHS = RHS'.
+ Expr = Expr.trim();
+ size_t EQIdx = Expr.find('=');
+
+ // Evaluate LHS.
+ StringRef LHSExpr = Expr.substr(0, EQIdx).rtrim();
+ StringRef RemainingExpr;
+ EvalResult LHSResult;
+ std::tie(LHSResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(LHSExpr));
+ if (LHSResult.hasError())
+ return handleError(Expr, LHSResult);
+ if (RemainingExpr != "")
+ return handleError(Expr, unexpectedToken(RemainingExpr, LHSExpr, ""));
+
+ // Evaluate RHS.
+ StringRef RHSExpr = Expr.substr(EQIdx + 1).ltrim();
+ EvalResult RHSResult;
+ std::tie(RHSResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(RHSExpr));
+ if (RHSResult.hasError())
+ return handleError(Expr, RHSResult);
+ if (RemainingExpr != "")
+ return handleError(Expr, unexpectedToken(RemainingExpr, RHSExpr, ""));
+
+ if (LHSResult.getValue() != RHSResult.getValue()) {
+ ErrStream << "Expression '" << Expr << "' is false: "
+ << format("0x%lx", LHSResult.getValue()) << " != "
+ << format("0x%lx", RHSResult.getValue()) << "\n";
+ return false;
+ }
+ return true;
+ }
+
+ private:
+ const RuntimeDyldChecker &Checker;
+ llvm::raw_ostream &ErrStream;
+
+ enum class BinOpToken : unsigned { Invalid, Add, Sub, BitwiseAnd,
+ BitwiseOr, ShiftLeft, ShiftRight };
+
+ class EvalResult {
+ public:
+ EvalResult()
+ : Value(0), ErrorMsg("") {}
+ EvalResult(uint64_t Value)
+ : Value(Value), ErrorMsg("") {}
+ EvalResult(std::string ErrorMsg)
+ : Value(0), ErrorMsg(ErrorMsg) {}
+ uint64_t getValue() const { return Value; }
+ bool hasError() const { return ErrorMsg != ""; }
+ const std::string& getErrorMsg() const { return ErrorMsg; }
+ private:
+ uint64_t Value;
+ std::string ErrorMsg;
+ };
+
+ StringRef getTokenForError(StringRef Expr) const {
+ if (Expr.empty())
+ return "";
+
+ StringRef Token, Remaining;
+ if (isalpha(Expr[0]))
+ std::tie(Token, Remaining) = parseSymbol(Expr);
+ else if (isdigit(Expr[0]))
+ std::tie(Token, Remaining) = parseNumberString(Expr);
+ else {
+ unsigned TokLen = 1;
+ if (Expr.startswith("<<") || Expr.startswith(">>"))
+ TokLen = 2;
+ Token = Expr.substr(0, TokLen);
+ }
+ return Token;
+ }
+
+ EvalResult unexpectedToken(StringRef TokenStart,
+ StringRef SubExpr,
+ StringRef ErrText) const {
+ std::string ErrorMsg("Encountered unexpected token '");
+ ErrorMsg += getTokenForError(TokenStart);
+ if (SubExpr != "") {
+ ErrorMsg += "' while parsing subexpression '";
+ ErrorMsg += SubExpr;
+ }
+ ErrorMsg += "'";
+ if (ErrText != "") {
+ ErrorMsg += " ";
+ ErrorMsg += ErrText;
+ }
+ return EvalResult(std::move(ErrorMsg));
+ }
+
+ bool handleError(StringRef Expr, const EvalResult &R) const {
+ assert(R.hasError() && "Not an error result.");
+ ErrStream << "Error evaluating expression '" << Expr << "': "
+ << R.getErrorMsg() << "\n";
+ return false;
+ }
+
+ std::pair<BinOpToken, StringRef> parseBinOpToken(StringRef Expr) const {
+ if (Expr.empty())
+ return std::make_pair(BinOpToken::Invalid, "");
+
+ // Handle the two 2-character tokens.
+ if (Expr.startswith("<<"))
+ return std::make_pair(BinOpToken::ShiftLeft,
+ Expr.substr(2).ltrim());
+ if (Expr.startswith(">>"))
+ return std::make_pair(BinOpToken::ShiftRight,
+ Expr.substr(2).ltrim());
+
+ // Handle one-character tokens.
+ BinOpToken Op;
+ switch (Expr[0]) {
+ default: return std::make_pair(BinOpToken::Invalid, Expr);
+ case '+': Op = BinOpToken::Add; break;
+ case '-': Op = BinOpToken::Sub; break;
+ case '&': Op = BinOpToken::BitwiseAnd; break;
+ case '|': Op = BinOpToken::BitwiseOr; break;
+ }
+
+ return std::make_pair(Op, Expr.substr(1).ltrim());
+ }
+
+ EvalResult computeBinOpResult(BinOpToken Op, const EvalResult &LHSResult,
+ const EvalResult &RHSResult) const {
+ switch (Op) {
+ default: llvm_unreachable("Tried to evaluate unrecognized operation.");
+ case BinOpToken::Add:
+ return EvalResult(LHSResult.getValue() + RHSResult.getValue());
+ case BinOpToken::Sub:
+ return EvalResult(LHSResult.getValue() - RHSResult.getValue());
+ case BinOpToken::BitwiseAnd:
+ return EvalResult(LHSResult.getValue() & RHSResult.getValue());
+ case BinOpToken::BitwiseOr:
+ return EvalResult(LHSResult.getValue() | RHSResult.getValue());
+ case BinOpToken::ShiftLeft:
+ return EvalResult(LHSResult.getValue() << RHSResult.getValue());
+ case BinOpToken::ShiftRight:
+ return EvalResult(LHSResult.getValue() >> RHSResult.getValue());
+ }
+ }
+
+ // Parse a symbol and return a (string, string) pair representing the symbol
+ // name and expression remaining to be parsed.
+ std::pair<StringRef, StringRef> parseSymbol(StringRef Expr) const {
+ size_t FirstNonSymbol =
+ Expr.find_first_not_of("0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ ":_");
+ return std::make_pair(Expr.substr(0, FirstNonSymbol),
+ Expr.substr(FirstNonSymbol).ltrim());
+ }
+
+ // Evaluate a call to decode_operand. Decode the instruction operand at the
+ // given symbol and get the value of the requested operand.
+ // Returns an error if the instruction cannot be decoded, or the requested
+ // operand is not an immediate.
+ // On success, retuns a pair containing the value of the operand, plus
+ // the expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!Checker.checkSymbolIsValidForLoad(Symbol))
+ return std::make_pair(EvalResult(("Cannot decode unknown symbol '" +
+ Symbol + "'").str()),
+ "");
+
+ if (!RemainingExpr.startswith(","))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected ','"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult OpIdxExpr;
+ std::tie(OpIdxExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ if (OpIdxExpr.hasError())
+ return std::make_pair(OpIdxExpr, "");
+
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected ')'"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ MCInst Inst;
+ uint64_t Size;
+ if (!decodeInst(Symbol, Inst, Size))
+ return std::make_pair(EvalResult(("Couldn't decode instruction at '" +
+ Symbol + "'").str()),
+ "");
+
+ unsigned OpIdx = OpIdxExpr.getValue();
+ if (OpIdx >= Inst.getNumOperands()) {
+ std::string ErrMsg;
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ ErrMsgStream << "Invalid operand index '" << format("%i", OpIdx)
+ << " for instruction '" << Symbol
+ << ". Instruction has only "
+ << format("%i", Inst.getNumOperands()) << " operands.";
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+ }
+
+ const MCOperand &Op = Inst.getOperand(OpIdx);
+ if (!Op.isImm()) {
+ std::string ErrMsg;
+ raw_string_ostream ErrMsgStream(ErrMsg);
+ ErrMsgStream << "Operand '" << format("%i", OpIdx)
+ << "' of instruction '" << Symbol
+ << "' is not an immediate.\nInstruction is:\n ";
+ Inst.dump_pretty(ErrMsgStream,
+ Checker.Disassembler->getContext().getAsmInfo(),
+ Checker.InstPrinter);
+
+ return std::make_pair(EvalResult(ErrMsgStream.str()), "");
+ }
+
+ return std::make_pair(EvalResult(Op.getImm()), RemainingExpr);
+ }
+
+ // Evaluate a call to next_pc. Decode the instruction at the given
+ // symbol and return the following program counter..
+ // Returns an error if the instruction cannot be decoded.
+ // On success, returns a pair containing the next PC, plus the length of the
+ // expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalNextPC(StringRef Expr) const {
+ if (!Expr.startswith("("))
+ return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), "");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!Checker.checkSymbolIsValidForLoad(Symbol))
+ return std::make_pair(EvalResult(("Cannot decode unknown symbol '"
+ + Symbol + "'").str()),
+ "");
+
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected ')'"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ MCInst Inst;
+ uint64_t Size;
+ if (!decodeInst(Symbol, Inst, Size))
+ return std::make_pair(EvalResult(("Couldn't decode instruction at '" +
+ Symbol + "'").str()),
+ "");
+ uint64_t NextPC = Checker.getSymbolAddress(Symbol) + Size;
+
+ return std::make_pair(EvalResult(NextPC), RemainingExpr);
+ }
+
+ // Evaluate an identiefer expr, which may be a symbol, or a call to
+ // one of the builtin functions: get_insn_opcode or get_insn_length.
+ // Return the result, plus the expression remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalIdentifierExpr(StringRef Expr) const {
+ StringRef Symbol;
+ StringRef RemainingExpr;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(Expr);
+
+ // Check for builtin function calls.
+ if (Symbol == "decode_operand")
+ return evalDecodeOperand(RemainingExpr);
+ else if (Symbol == "next_pc")
+ return evalNextPC(RemainingExpr);
+
+ // Looks like a plain symbol reference.
+ return std::make_pair(EvalResult(Checker.getSymbolAddress(Symbol)),
+ RemainingExpr);
+ }
+
+ // Parse a number (hexadecimal or decimal) and return a (string, string)
+ // pair representing the number and the expression remaining to be parsed.
+ std::pair<StringRef, StringRef> parseNumberString(StringRef Expr) const {
+ size_t FirstNonDigit = StringRef::npos;
+ if (Expr.startswith("0x")) {
+ FirstNonDigit = Expr.find_first_not_of("0123456789abcdefABCDEF", 2);
+ if (FirstNonDigit == StringRef::npos)
+ FirstNonDigit = Expr.size();
+ } else {
+ FirstNonDigit = Expr.find_first_not_of("0123456789");
+ if (FirstNonDigit == StringRef::npos)
+ FirstNonDigit = Expr.size();
+ }
+ return std::make_pair(Expr.substr(0, FirstNonDigit),
+ Expr.substr(FirstNonDigit));
+ }
+
+ // Evaluate a constant numeric expression (hexidecimal or decimal) and
+ // return a pair containing the result, and the expression remaining to be
+ // evaluated.
+ std::pair<EvalResult, StringRef> evalNumberExpr(StringRef Expr) const {
+ StringRef ValueStr;
+ StringRef RemainingExpr;
+ std::tie(ValueStr, RemainingExpr) = parseNumberString(Expr);
+
+ if (ValueStr.empty() || !isdigit(ValueStr[0]))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected number"),
+ "");
+ uint64_t Value;
+ ValueStr.getAsInteger(0, Value);
+ return std::make_pair(EvalResult(Value), RemainingExpr);
+ }
+
+ // Evaluate an expression of the form "(<expr>)" and return a pair
+ // containing the result of evaluating <expr>, plus the expression
+ // remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalParensExpr(StringRef Expr) const {
+ assert(Expr.startswith("(") && "Not a parenthesized expression");
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+ std::tie(SubExprResult, RemainingExpr) =
+ evalComplexExpr(evalSimpleExpr(Expr.substr(1).ltrim()));
+ if (SubExprResult.hasError())
+ return std::make_pair(SubExprResult, "");
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(unexpectedToken(RemainingExpr, Expr,
+ "expected ')'"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ return std::make_pair(SubExprResult, RemainingExpr);
+ }
+
+ // Evaluate an expression in one of the following forms:
+ // *{<number>}<symbol>
+ // *{<number>}(<symbol> + <number>)
+ // *{<number>}(<symbol> - <number>)
+ // Return a pair containing the result, plus the expression remaining to be
+ // parsed.
+ std::pair<EvalResult, StringRef> evalLoadExpr(StringRef Expr) const {
+ assert(Expr.startswith("*") && "Not a load expression");
+ StringRef RemainingExpr = Expr.substr(1).ltrim();
+ // Parse read size.
+ if (!RemainingExpr.startswith("{"))
+ return std::make_pair(EvalResult("Expected '{' following '*'."), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ EvalResult ReadSizeExpr;
+ std::tie(ReadSizeExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ if (ReadSizeExpr.hasError())
+ return std::make_pair(ReadSizeExpr, RemainingExpr);
+ uint64_t ReadSize = ReadSizeExpr.getValue();
+ if (ReadSize < 1 || ReadSize > 8)
+ return std::make_pair(EvalResult("Invalid size for dereference."), "");
+ if (!RemainingExpr.startswith("}"))
+ return std::make_pair(EvalResult("Missing '}' for dereference."), "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ // Check for '(symbol +/- constant)' form.
+ bool SymbolPlusConstant = false;
+ if (RemainingExpr.startswith("(")) {
+ SymbolPlusConstant = true;
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ }
+
+ // Read symbol.
+ StringRef Symbol;
+ std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr);
+
+ if (!Checker.checkSymbolIsValidForLoad(Symbol))
+ return std::make_pair(EvalResult(("Cannot dereference unknown symbol '"
+ + Symbol + "'").str()),
+ "");
+
+ // Set up defaut offset.
+ int64_t Offset = 0;
+
+ // Handle "+/- constant)" portion if necessary.
+ if (SymbolPlusConstant) {
+ char OpChar = RemainingExpr[0];
+ if (OpChar != '+' && OpChar != '-')
+ return std::make_pair(EvalResult("Invalid operator in load address."),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult OffsetExpr;
+ std::tie(OffsetExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+
+ Offset = (OpChar == '+') ?
+ OffsetExpr.getValue() : -1 * OffsetExpr.getValue();
+
+ if (!RemainingExpr.startswith(")"))
+ return std::make_pair(EvalResult("Missing ')' in load address."),
+ "");
+
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+ }
+
+ return std::make_pair(
+ EvalResult(Checker.readMemoryAtSymbol(Symbol, Offset, ReadSize)),
+ RemainingExpr);
+ }
+
+ // Evaluate a "simple" expression. This is any expression that _isn't_ an
+ // un-parenthesized binary expression.
+ //
+ // "Simple" expressions can be optionally bit-sliced. See evalSlicedExpr.
+ //
+ // Returns a pair containing the result of the evaluation, plus the
+ // expression remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalSimpleExpr(StringRef Expr) const {
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+
+ if (Expr.empty())
+ return std::make_pair(EvalResult("Unexpected end of expression"), "");
+
+ if (Expr[0] == '(')
+ std::tie(SubExprResult, RemainingExpr) = evalParensExpr(Expr);
+ else if (Expr[0] == '*')
+ std::tie(SubExprResult, RemainingExpr) = evalLoadExpr(Expr);
+ else if (isalpha(Expr[0]))
+ std::tie(SubExprResult, RemainingExpr) = evalIdentifierExpr(Expr);
+ else if (isdigit(Expr[0]))
+ std::tie(SubExprResult, RemainingExpr) = evalNumberExpr(Expr);
+
+ if (SubExprResult.hasError())
+ return std::make_pair(SubExprResult, RemainingExpr);
+
+ // Evaluate bit-slice if present.
+ if (RemainingExpr.startswith("["))
+ std::tie(SubExprResult, RemainingExpr) =
+ evalSliceExpr(std::make_pair(SubExprResult, RemainingExpr));
+
+ return std::make_pair(SubExprResult, RemainingExpr);
+ }
+
+ // Evaluate a bit-slice of an expression.
+ // A bit-slice has the form "<expr>[high:low]". The result of evaluating a
+ // slice is the bits between high and low (inclusive) in the original
+ // expression, right shifted so that the "low" bit is in position 0 in the
+ // result.
+ // Returns a pair containing the result of the slice operation, plus the
+ // expression remaining to be parsed.
+ std::pair<EvalResult, StringRef> evalSliceExpr(
+ std::pair<EvalResult, StringRef> Ctx) const{
+ EvalResult SubExprResult;
+ StringRef RemainingExpr;
+ std::tie(SubExprResult, RemainingExpr) = Ctx;
+
+ assert(RemainingExpr.startswith("[") && "Not a slice expr.");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult HighBitExpr;
+ std::tie(HighBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+
+ if (HighBitExpr.hasError())
+ return std::make_pair(HighBitExpr, RemainingExpr);
+
+ if (!RemainingExpr.startswith(":"))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected ':'"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ EvalResult LowBitExpr;
+ std::tie(LowBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr);
+
+ if (LowBitExpr.hasError())
+ return std::make_pair(LowBitExpr, RemainingExpr);
+
+ if (!RemainingExpr.startswith("]"))
+ return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected ']'"),
+ "");
+ RemainingExpr = RemainingExpr.substr(1).ltrim();
+
+ unsigned HighBit = HighBitExpr.getValue();
+ unsigned LowBit = LowBitExpr.getValue();
+ uint64_t Mask = ((uint64_t)1 << (HighBit - LowBit + 1)) - 1;
+ uint64_t SlicedValue = (SubExprResult.getValue() >> LowBit) & Mask;
+ return std::make_pair(EvalResult(SlicedValue), RemainingExpr);
+ }
+
+ // Evaluate a "complex" expression.
+ // Takes an already evaluated subexpression and checks for the presence of a
+ // binary operator, computing the result of the binary operation if one is
+ // found. Used to make arithmetic expressions left-associative.
+ // Returns a pair containing the ultimate result of evaluating the
+ // expression, plus the expression remaining to be evaluated.
+ std::pair<EvalResult, StringRef> evalComplexExpr(
+ std::pair<EvalResult, StringRef> Ctx) const {
+ EvalResult LHSResult;
+ StringRef RemainingExpr;
+ std::tie(LHSResult, RemainingExpr) = Ctx;
+
+ // If there was an error, or there's nothing left to evaluate, return the
+ // result.
+ if (LHSResult.hasError() || RemainingExpr == "")
+ return std::make_pair(LHSResult, RemainingExpr);
+
+ // Otherwise check if this is a binary expressioan.
+ BinOpToken BinOp;
+ std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
+
+ // If this isn't a recognized expression just return.
+ if (BinOp == BinOpToken::Invalid)
+ return std::make_pair(LHSResult, RemainingExpr);
+
+ // This is a recognized bin-op. Evaluate the RHS, then evaluate the binop.
+ EvalResult RHSResult;
+ std::tie(RHSResult, RemainingExpr) = evalSimpleExpr(RemainingExpr);
+
+ // If there was an error evaluating the RHS, return it.
+ if (RHSResult.hasError())
+ return std::make_pair(RHSResult, RemainingExpr);
+
+ // This is a binary expression - evaluate and try to continue as a
+ // complex expr.
+ EvalResult ThisResult(computeBinOpResult(BinOp, LHSResult, RHSResult));
+
+ return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr));
+ }
+
+ bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
+ MCDisassembler *Dis = Checker.Disassembler;
+ StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol);
+ StringRefMemoryObject SectionBytes(SectionMem, 0);
+
+ MCDisassembler::DecodeStatus S =
+ Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls());
+
+ return (S == MCDisassembler::Success);
+ }
+
+ };
+
+}
+
+bool RuntimeDyldChecker::check(StringRef CheckExpr) const {
+ CheckExpr = CheckExpr.trim();
+ DEBUG(llvm::dbgs() << "RuntimeDyldChecker: Checking '" << CheckExpr
+ << "'...\n");
+ RuntimeDyldCheckerExprEval P(*this, ErrStream);
+ bool Result = P.evaluate(CheckExpr);
+ (void)Result;
+ DEBUG(llvm::dbgs() << "RuntimeDyldChecker: '" << CheckExpr << "' "
+ << (Result ? "passed" : "FAILED") << ".\n");
+ return Result;
+}
+
+bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix,
+ MemoryBuffer* MemBuf) const {
+ bool DidAllTestsPass = true;
+ unsigned NumRules = 0;
+
+ const char *LineStart = MemBuf->getBufferStart();
+
+ // Eat whitespace.
+ while (LineStart != MemBuf->getBufferEnd() &&
+ std::isspace(*LineStart))
+ ++LineStart;
+
+ while (LineStart != MemBuf->getBufferEnd() && *LineStart != '\0') {
+ const char *LineEnd = LineStart;
+ while (LineEnd != MemBuf->getBufferEnd() &&
+ *LineEnd != '\r' && *LineEnd != '\n')
+ ++LineEnd;
+
+ StringRef Line(LineStart, LineEnd - LineStart);
+ if (Line.startswith(RulePrefix)) {
+ DidAllTestsPass &= check(Line.substr(RulePrefix.size()));
+ ++NumRules;
+ }
+
+ // Eat whitespace.
+ LineStart = LineEnd;
+ while (LineStart != MemBuf->getBufferEnd() &&
+ std::isspace(*LineStart))
+ ++LineStart;
+ }
+ return DidAllTestsPass && (NumRules != 0);
+}
+
+bool RuntimeDyldChecker::checkSymbolIsValidForLoad(StringRef Symbol) const {
+ return RTDyld.getSymbolAddress(Symbol) != nullptr;
+}
+
+uint64_t RuntimeDyldChecker::getSymbolAddress(StringRef Symbol) const {
+ return RTDyld.getAnySymbolRemoteAddress(Symbol);
+}
+
+uint64_t RuntimeDyldChecker::readMemoryAtSymbol(StringRef Symbol,
+ int64_t Offset,
+ unsigned Size) const {
+ uint8_t *Src = RTDyld.getSymbolAddress(Symbol);
+ uint64_t Result = 0;
+ memcpy(&Result, Src + Offset, Size);
+ return Result;
+}
+
+StringRef RuntimeDyldChecker::getSubsectionStartingAt(StringRef Name) const {
+ RuntimeDyldImpl::SymbolTableMap::const_iterator pos =
+ RTDyld.GlobalSymbolTable.find(Name);
+ if (pos == RTDyld.GlobalSymbolTable.end())
+ return StringRef();
+ RuntimeDyldImpl::SymbolLoc Loc = pos->second;
+ uint8_t *SectionAddr = RTDyld.getSectionAddress(Loc.first);
+ return StringRef(reinterpret_cast<const char*>(SectionAddr) + Loc.second,
+ RTDyld.Sections[Loc.first].Size - Loc.second);
+}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 6ba24b9..80e489c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -32,7 +32,7 @@ using namespace llvm::object;
namespace {
-static inline error_code check(error_code Err) {
+static inline std::error_code check(std::error_code Err) {
if (Err) {
report_fatal_error(Err.message());
}
@@ -55,9 +55,9 @@ template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> {
public:
DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
- MemoryBuffer *Wrapper, error_code &ec);
+ std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec);
- DyldELFObject(MemoryBuffer *Wrapper, error_code &ec);
+ DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec);
void updateSectionAddress(const SectionRef &Sec, uint64_t Addr);
void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr);
@@ -109,15 +109,17 @@ public:
// actual memory. Ultimately, the Binary parent class will take ownership of
// this MemoryBuffer object but not the underlying memory.
template <class ELFT>
-DyldELFObject<ELFT>::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec)
- : ELFObjectFile<ELFT>(Wrapper, ec) {
+DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper,
+ std::error_code &EC)
+ : ELFObjectFile<ELFT>(std::move(Wrapper), EC) {
this->isDyldELFObject = true;
}
template <class ELFT>
DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
- MemoryBuffer *Wrapper, error_code &ec)
- : ELFObjectFile<ELFT>(Wrapper, ec),
+ std::unique_ptr<MemoryBuffer> Wrapper,
+ std::error_code &EC)
+ : ELFObjectFile<ELFT>(std::move(Wrapper), EC),
UnderlyingFile(std::move(UnderlyingFile)) {
this->isDyldELFObject = true;
}
@@ -182,30 +184,30 @@ RuntimeDyldELF::createObjectImageFromFile(std::unique_ptr<object::ObjectFile> Ob
if (!ObjFile)
return nullptr;
- error_code ec;
- MemoryBuffer *Buffer =
- MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false);
+ std::error_code ec;
+ std::unique_ptr<MemoryBuffer> Buffer(
+ MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false));
if (ObjFile->getBytesInAddress() == 4 && ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 2, false>>>(
- std::move(ObjFile), Buffer, ec);
+ std::move(ObjFile), std::move(Buffer), ec);
return new ELFObjectImage<ELFType<support::little, 2, false>>(
nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 4 && !ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::big, 2, false>>>(
- std::move(ObjFile), Buffer, ec);
+ std::move(ObjFile), std::move(Buffer), ec);
return new ELFObjectImage<ELFType<support::big, 2, false>>(nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && !ObjFile->isLittleEndian()) {
auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 2, true>>>(
- std::move(ObjFile), Buffer, ec);
+ std::move(ObjFile), std::move(Buffer), ec);
return new ELFObjectImage<ELFType<support::big, 2, true>>(nullptr,
std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && ObjFile->isLittleEndian()) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 2, true>>>(
- std::move(ObjFile), Buffer, ec);
+ std::move(ObjFile), std::move(Buffer), ec);
return new ELFObjectImage<ELFType<support::little, 2, true>>(
nullptr, std::move(Obj));
} else
@@ -218,31 +220,33 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
std::pair<unsigned char, unsigned char> Ident =
std::make_pair((uint8_t)Buffer->getBufferStart()[ELF::EI_CLASS],
(uint8_t)Buffer->getBufferStart()[ELF::EI_DATA]);
- error_code ec;
+ std::error_code ec;
+
+ std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer());
if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 4, false>>>(
- Buffer->getMemBuffer(), ec);
+ std::move(Buf), ec);
return new ELFObjectImage<ELFType<support::little, 4, false>>(
Buffer, std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS32 &&
Ident.second == ELF::ELFDATA2MSB) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::big, 4, false>>>(
- Buffer->getMemBuffer(), ec);
+ std::move(Buf), ec);
return new ELFObjectImage<ELFType<support::big, 4, false>>(Buffer,
std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS64 &&
Ident.second == ELF::ELFDATA2MSB) {
auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 8, true>>>(
- Buffer->getMemBuffer(), ec);
+ std::move(Buf), ec);
return new ELFObjectImage<ELFType<support::big, 8, true>>(Buffer, std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS64 &&
Ident.second == ELF::ELFDATA2LSB) {
auto Obj =
llvm::make_unique<DyldELFObject<ELFType<support::little, 8, true>>>(
- Buffer->getMemBuffer(), ec);
+ std::move(Buf), ec);
return new ELFObjectImage<ELFType<support::little, 8, true>>(Buffer, std::move(Obj));
} else
llvm_unreachable("Unexpected ELF format");
@@ -612,30 +616,38 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
}
}
-// Return the .TOC. section address to R_PPC64_TOC relocations.
-uint64_t RuntimeDyldELF::findPPC64TOC() const {
+// Return the .TOC. section and offset.
+void RuntimeDyldELF::findPPC64TOCSection(ObjectImage &Obj,
+ ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel) {
+ // Set a default SectionID in case we do not find a TOC section below.
+ // This may happen for references to TOC base base (sym@toc, .odp
+ // relocation) without a .toc directive. In this case just use the
+ // first section (which is usually the .odp) since the code won't
+ // reference the .toc base directly.
+ Rel.SymbolName = NULL;
+ Rel.SectionID = 0;
+
// The TOC consists of sections .got, .toc, .tocbss, .plt in that
// order. The TOC starts where the first of these sections starts.
- SectionList::const_iterator it = Sections.begin();
- SectionList::const_iterator ite = Sections.end();
- for (; it != ite; ++it) {
- if (it->Name == ".got" || it->Name == ".toc" || it->Name == ".tocbss" ||
- it->Name == ".plt")
+ for (section_iterator si = Obj.begin_sections(), se = Obj.end_sections();
+ si != se; ++si) {
+
+ StringRef SectionName;
+ check(si->getName(SectionName));
+
+ if (SectionName == ".got"
+ || SectionName == ".toc"
+ || SectionName == ".tocbss"
+ || SectionName == ".plt") {
+ Rel.SectionID = findOrEmitSection(Obj, *si, false, LocalSections);
break;
+ }
}
- if (it == ite) {
- // This may happen for
- // * references to TOC base base (sym@toc, .odp relocation) without
- // a .toc directive.
- // In this case just use the first section (which is usually
- // the .odp) since the code won't reference the .toc base
- // directly.
- it = Sections.begin();
- }
- assert(it != ite);
+
// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000
// thus permitting a full 64 Kbytes segment.
- return it->LoadAddress + 0x8000;
+ Rel.Addend = 0x8000;
}
// Returns the sections and offset associated with the ODP entry referenced
@@ -702,24 +714,37 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
llvm_unreachable("Attempting to get address of ODP entry!");
}
-// Relocation masks following the #lo(value), #hi(value), #higher(value),
-// and #highest(value) macros defined in section 4.5.1. Relocation Types
-// in PPC-elf64abi document.
-//
+// Relocation masks following the #lo(value), #hi(value), #ha(value),
+// #higher(value), #highera(value), #highest(value), and #highesta(value)
+// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi
+// document.
+
static inline uint16_t applyPPClo(uint64_t value) { return value & 0xffff; }
static inline uint16_t applyPPChi(uint64_t value) {
return (value >> 16) & 0xffff;
}
+static inline uint16_t applyPPCha (uint64_t value) {
+ return ((value + 0x8000) >> 16) & 0xffff;
+}
+
static inline uint16_t applyPPChigher(uint64_t value) {
return (value >> 32) & 0xffff;
}
+static inline uint16_t applyPPChighera (uint64_t value) {
+ return ((value + 0x8000) >> 32) & 0xffff;
+}
+
static inline uint16_t applyPPChighest(uint64_t value) {
return (value >> 48) & 0xffff;
}
+static inline uint16_t applyPPChighesta (uint64_t value) {
+ return ((value + 0x8000) >> 48) & 0xffff;
+}
+
void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
uint64_t Offset, uint64_t Value,
uint32_t Type, int64_t Addend) {
@@ -728,24 +753,57 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
default:
llvm_unreachable("Relocation type not implemented yet!");
break;
+ case ELF::R_PPC64_ADDR16:
+ writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
+ break;
+ case ELF::R_PPC64_ADDR16_DS:
+ writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3);
+ break;
case ELF::R_PPC64_ADDR16_LO:
writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
break;
+ case ELF::R_PPC64_ADDR16_LO_DS:
+ writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3);
+ break;
case ELF::R_PPC64_ADDR16_HI:
writeInt16BE(LocalAddress, applyPPChi(Value + Addend));
break;
+ case ELF::R_PPC64_ADDR16_HA:
+ writeInt16BE(LocalAddress, applyPPCha(Value + Addend));
+ break;
case ELF::R_PPC64_ADDR16_HIGHER:
writeInt16BE(LocalAddress, applyPPChigher(Value + Addend));
break;
+ case ELF::R_PPC64_ADDR16_HIGHERA:
+ writeInt16BE(LocalAddress, applyPPChighera(Value + Addend));
+ break;
case ELF::R_PPC64_ADDR16_HIGHEST:
writeInt16BE(LocalAddress, applyPPChighest(Value + Addend));
break;
+ case ELF::R_PPC64_ADDR16_HIGHESTA:
+ writeInt16BE(LocalAddress, applyPPChighesta(Value + Addend));
+ break;
case ELF::R_PPC64_ADDR14: {
assert(((Value + Addend) & 3) == 0);
// Preserve the AA/LK bits in the branch instruction
uint8_t aalk = *(LocalAddress + 3);
writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
} break;
+ case ELF::R_PPC64_REL16_LO: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t Delta = Value - FinalAddress + Addend;
+ writeInt16BE(LocalAddress, applyPPClo(Delta));
+ } break;
+ case ELF::R_PPC64_REL16_HI: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t Delta = Value - FinalAddress + Addend;
+ writeInt16BE(LocalAddress, applyPPChi(Delta));
+ } break;
+ case ELF::R_PPC64_REL16_HA: {
+ uint64_t FinalAddress = (Section.LoadAddress + Offset);
+ uint64_t Delta = Value - FinalAddress + Addend;
+ writeInt16BE(LocalAddress, applyPPCha(Delta));
+ } break;
case ELF::R_PPC64_ADDR32: {
int32_t Result = static_cast<int32_t>(Value + Addend);
if (SignExtend32<32>(Result) != Result)
@@ -775,19 +833,6 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
case ELF::R_PPC64_ADDR64:
writeInt64BE(LocalAddress, Value + Addend);
break;
- case ELF::R_PPC64_TOC:
- writeInt64BE(LocalAddress, findPPC64TOC());
- break;
- case ELF::R_PPC64_TOC16: {
- uint64_t TOCStart = findPPC64TOC();
- Value = applyPPClo((Value + Addend) - TOCStart);
- writeInt16BE(LocalAddress, applyPPClo(Value));
- } break;
- case ELF::R_PPC64_TOC16_DS: {
- uint64_t TOCStart = findPPC64TOC();
- Value = ((Value + Addend) - TOCStart);
- writeInt16BE(LocalAddress, applyPPClo(Value));
- } break;
}
}
@@ -1139,14 +1184,20 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
ELF::R_PPC64_ADDR64, Value.Addend);
// Generates the 64-bits address loads as exemplified in section
- // 4.5.1 in PPC64 ELF ABI.
- RelocationEntry REhst(SectionID, StubTargetAddr - Section.Address + 2,
+ // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to
+ // apply to the low part of the instructions, so we have to update
+ // the offset according to the target endianness.
+ uint64_t StubRelocOffset = StubTargetAddr - Section.Address;
+ if (!IsTargetLittleEndian)
+ StubRelocOffset += 2;
+
+ RelocationEntry REhst(SectionID, StubRelocOffset + 0,
ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
- RelocationEntry REhr(SectionID, StubTargetAddr - Section.Address + 6,
+ RelocationEntry REhr(SectionID, StubRelocOffset + 4,
ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
- RelocationEntry REh(SectionID, StubTargetAddr - Section.Address + 14,
+ RelocationEntry REh(SectionID, StubRelocOffset + 12,
ELF::R_PPC64_ADDR16_HI, Value.Addend);
- RelocationEntry REl(SectionID, StubTargetAddr - Section.Address + 18,
+ RelocationEntry REl(SectionID, StubRelocOffset + 16,
ELF::R_PPC64_ADDR16_LO, Value.Addend);
if (Value.SymbolName) {
@@ -1170,12 +1221,52 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// Restore the TOC for external calls
writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1)
}
+ } else if (RelType == ELF::R_PPC64_TOC16 ||
+ RelType == ELF::R_PPC64_TOC16_DS ||
+ RelType == ELF::R_PPC64_TOC16_LO ||
+ RelType == ELF::R_PPC64_TOC16_LO_DS ||
+ RelType == ELF::R_PPC64_TOC16_HI ||
+ RelType == ELF::R_PPC64_TOC16_HA) {
+ // These relocations are supposed to subtract the TOC address from
+ // the final value. This does not fit cleanly into the RuntimeDyld
+ // scheme, since there may be *two* sections involved in determining
+ // the relocation value (the section of the symbol refered to by the
+ // relocation, and the TOC section associated with the current module).
+ //
+ // Fortunately, these relocations are currently only ever generated
+ // refering to symbols that themselves reside in the TOC, which means
+ // that the two sections are actually the same. Thus they cancel out
+ // and we can immediately resolve the relocation right now.
+ switch (RelType) {
+ case ELF::R_PPC64_TOC16: RelType = ELF::R_PPC64_ADDR16; break;
+ case ELF::R_PPC64_TOC16_DS: RelType = ELF::R_PPC64_ADDR16_DS; break;
+ case ELF::R_PPC64_TOC16_LO: RelType = ELF::R_PPC64_ADDR16_LO; break;
+ case ELF::R_PPC64_TOC16_LO_DS: RelType = ELF::R_PPC64_ADDR16_LO_DS; break;
+ case ELF::R_PPC64_TOC16_HI: RelType = ELF::R_PPC64_ADDR16_HI; break;
+ case ELF::R_PPC64_TOC16_HA: RelType = ELF::R_PPC64_ADDR16_HA; break;
+ default: llvm_unreachable("Wrong relocation type.");
+ }
+
+ RelocationValueRef TOCValue;
+ findPPC64TOCSection(Obj, ObjSectionToID, TOCValue);
+ if (Value.SymbolName || Value.SectionID != TOCValue.SectionID)
+ llvm_unreachable("Unsupported TOC relocation.");
+ Value.Addend -= TOCValue.Addend;
+ resolveRelocation(Sections[SectionID], Offset, Value.Addend, RelType, 0);
} else {
+ // There are two ways to refer to the TOC address directly: either
+ // via a ELF::R_PPC64_TOC relocation (where both symbol and addend are
+ // ignored), or via any relocation that refers to the magic ".TOC."
+ // symbols (in which case the addend is respected).
+ if (RelType == ELF::R_PPC64_TOC) {
+ RelType = ELF::R_PPC64_ADDR64;
+ findPPC64TOCSection(Obj, ObjSectionToID, Value);
+ } else if (TargetName == ".TOC.") {
+ findPPC64TOCSection(Obj, ObjSectionToID, Value);
+ Value.Addend += Addend;
+ }
+
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
- // Extra check to avoid relocation againt empty symbols (usually
- // the R_PPC64_TOC).
- if (SymType != SymbolRef::ST_Unknown && TargetName.empty())
- Value.SymbolName = nullptr;
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index a526073..59fdfbe 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -20,10 +20,9 @@
using namespace llvm;
namespace llvm {
-
namespace {
// Helper for extensive error checking in debug builds.
-error_code Check(error_code Err) {
+std::error_code Check(std::error_code Err) {
if (Err) {
report_fatal_error(Err.message());
}
@@ -83,7 +82,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
return 1;
}
- uint64_t findPPC64TOC() const;
+ void findPPC64TOCSection(ObjectImage &Obj, ObjSectionToIDMap &LocalSections,
+ RelocationValueRef &Rel);
void findOPDEntrySection(ObjectImage &Obj, ObjSectionToIDMap &LocalSections,
RelocationValueRef &Rel);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 412cf20..0336cba 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/ObjectImage.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -28,8 +29,8 @@
#include "llvm/Support/Mutex.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <map>
+#include <system_error>
using namespace llvm;
using namespace llvm::object;
@@ -158,6 +159,15 @@ public:
};
class RuntimeDyldImpl {
+ friend class RuntimeDyldChecker;
+private:
+
+ uint64_t getAnySymbolRemoteAddress(StringRef Symbol) {
+ if (uint64_t InternalSymbolAddr = getSymbolLoadAddress(Symbol))
+ return InternalSymbolAddr;
+ return MemMgr->getSymbolAddress(Symbol);
+ }
+
protected:
// The MemoryManager to load objects into.
RTDyldMemoryManager *MemMgr;
@@ -245,14 +255,14 @@ protected:
void writeInt16BE(uint8_t *Addr, uint16_t Value) {
if (IsTargetLittleEndian)
- Value = sys::SwapByteOrder(Value);
+ sys::swapByteOrder(Value);
*Addr = (Value >> 8) & 0xFF;
*(Addr + 1) = Value & 0xFF;
}
void writeInt32BE(uint8_t *Addr, uint32_t Value) {
if (IsTargetLittleEndian)
- Value = sys::SwapByteOrder(Value);
+ sys::swapByteOrder(Value);
*Addr = (Value >> 24) & 0xFF;
*(Addr + 1) = (Value >> 16) & 0xFF;
*(Addr + 2) = (Value >> 8) & 0xFF;
@@ -261,7 +271,7 @@ protected:
void writeInt64BE(uint8_t *Addr, uint64_t Value) {
if (IsTargetLittleEndian)
- Value = sys::SwapByteOrder(Value);
+ sys::swapByteOrder(Value);
*Addr = (Value >> 56) & 0xFF;
*(Addr + 1) = (Value >> 48) & 0xFF;
*(Addr + 2) = (Value >> 40) & 0xFF;
@@ -339,7 +349,8 @@ protected:
public:
RuntimeDyldImpl(RTDyldMemoryManager *mm)
- : MemMgr(mm), ProcessAllSections(false), HasError(false) {}
+ : MemMgr(mm), ProcessAllSections(false), HasError(false) {
+ }
virtual ~RuntimeDyldImpl();
@@ -349,7 +360,7 @@ public:
ObjectImage *loadObject(ObjectImage *InputObject);
- void *getSymbolAddress(StringRef Name) {
+ uint8_t* getSymbolAddress(StringRef Name) {
// FIXME: Just look up as a function for now. Overly simple of course.
// Work in progress.
SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 2b425fb..4eb516c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -14,6 +14,8 @@
#include "RuntimeDyldMachO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "ObjectImageCommon.h"
+#include "JITRegistrar.h"
using namespace llvm;
using namespace llvm::object;
@@ -21,6 +23,126 @@ using namespace llvm::object;
namespace llvm {
+class MachOObjectImage : public ObjectImageCommon {
+private:
+ typedef SmallVector<uint64_t, 1> SectionAddrList;
+ SectionAddrList OldSectionAddrList;
+
+protected:
+ bool is64;
+ bool Registered;
+
+private:
+ void initOldAddress() {
+ MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get());
+ // Unfortunately we need to do this, since there's information encoded
+ // in the original addr of the section that we could not otherwise
+ // recover. The reason for this is that symbols do not actually store
+ // their file offset, but only their vmaddr. This means that in order
+ // to locate the symbol correctly in the object file, we need to know
+ // where the original start of the section was (including any padding,
+ // etc).
+ for (section_iterator i = objf->section_begin(), e = objf->section_end();
+ i != e; ++i) {
+ uint64_t Addr;
+ i->getAddress(Addr);
+ OldSectionAddrList[i->getRawDataRefImpl().d.a] = Addr;
+ }
+ }
+
+public:
+ MachOObjectImage(ObjectBuffer *Input, bool is64)
+ : ObjectImageCommon(Input),
+ OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0),
+ is64(is64), Registered(false) {
+ initOldAddress();
+ }
+
+ MachOObjectImage(std::unique_ptr<object::ObjectFile> Input, bool is64)
+ : ObjectImageCommon(std::move(Input)),
+ OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0),
+ is64(is64), Registered(false) {
+ initOldAddress();
+ }
+
+ virtual ~MachOObjectImage() {
+ if (Registered)
+ deregisterWithDebugger();
+ }
+
+ // Subclasses can override these methods to update the image with loaded
+ // addresses for sections and common symbols
+ virtual void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) {
+ MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get());
+ char *data =
+ const_cast<char *>(objf->getSectionPointer(Sec.getRawDataRefImpl()));
+
+ uint64_t oldAddr = OldSectionAddrList[Sec.getRawDataRefImpl().d.a];
+
+ if (is64) {
+ ((MachO::section_64 *)data)->addr = Addr;
+ } else {
+ ((MachO::section *)data)->addr = Addr;
+ }
+
+ for (symbol_iterator i = objf->symbol_begin(), e = objf->symbol_end();
+ i != e; ++i) {
+ section_iterator symSec(objf->section_end());
+ (*i).getSection(symSec);
+ if (*symSec == Sec) {
+ uint64_t symAddr;
+ (*i).getAddress(symAddr);
+ updateSymbolAddress(*i, symAddr + Addr - oldAddr);
+ }
+ }
+ }
+
+ uint64_t getOldSectionAddr(const SectionRef &Sec) const {
+ return OldSectionAddrList[Sec.getRawDataRefImpl().d.a];
+ }
+
+ virtual void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) {
+ char *data = const_cast<char *>(
+ reinterpret_cast<const char *>(Sym.getRawDataRefImpl().p));
+ if (is64)
+ ((MachO::nlist_64 *)data)->n_value = Addr;
+ else
+ ((MachO::nlist *)data)->n_value = Addr;
+ }
+
+ virtual void registerWithDebugger() {
+ JITRegistrar::getGDBRegistrar().registerObject(*Buffer);
+ Registered = true;
+ }
+
+ virtual void deregisterWithDebugger() {
+ JITRegistrar::getGDBRegistrar().deregisterObject(*Buffer);
+ }
+};
+
+ObjectImage *RuntimeDyldMachO::createObjectImage(ObjectBuffer *Buffer) {
+ uint32_t magic = *((const uint32_t *)Buffer->getBufferStart());
+ bool is64 = (magic == MachO::MH_MAGIC_64);
+ assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) &&
+ "Unrecognized Macho Magic");
+ return new MachOObjectImage(Buffer, is64);
+}
+
+ObjectImage *RuntimeDyldMachO::createObjectImageFromFile(
+ std::unique_ptr<object::ObjectFile> ObjFile) {
+ if (!ObjFile)
+ return nullptr;
+
+ MemoryBuffer *Buffer =
+ MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false);
+
+ uint32_t magic = *((const uint32_t *)Buffer->getBufferStart());
+ bool is64 = (magic == MachO::MH_MAGIC_64);
+ assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) &&
+ "Unrecognized Macho Magic");
+ return new MachOObjectImage(std::move(ObjFile), is64);
+}
+
static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText,
intptr_t DeltaForEH) {
DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText
@@ -533,6 +655,7 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols,
StubMap &Stubs) {
const ObjectFile *OF = Obj.getObjectFile();
+ const MachOObjectImage &MachOObj = *static_cast<MachOObjectImage *>(&Obj);
const MachOObjectFile *MachO = static_cast<const MachOObjectFile *>(OF);
MachO::any_relocation_info RE =
MachO->getRelocation(RelI->getRawDataRefImpl());
@@ -609,8 +732,8 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
bool IsCode = false;
Sec.isText(IsCode);
Value.SectionID = findOrEmitSection(Obj, Sec, IsCode, ObjSectionToID);
- uint64_t Addr;
- Sec.getAddress(Addr);
+ uint64_t Addr = MachOObj.getOldSectionAddr(Sec);
+ DEBUG(dbgs() << "\nAddr: " << Addr << "\nAddend: " << Addend);
Value.Addend = Addend - Addr;
if (IsPCRel)
Value.Addend += Offset + NumBytes;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 060eb8c..35f0720 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -105,14 +105,9 @@ public:
void finalizeLoad(ObjectImage &ObjImg,
ObjSectionToIDMap &SectionMap) override;
- static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer) {
- return new ObjectImageCommon(InputBuffer);
- }
-
+ static ObjectImage *createObjectImage(ObjectBuffer *Buffer);
static ObjectImage *
- createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject) {
- return new ObjectImageCommon(std::move(InputObject));
- }
+ createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject);
};
} // end namespace llvm
diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk
index 2ffc86c..c51b241 100644
--- a/lib/IR/Android.mk
+++ b/lib/IR/Android.mk
@@ -5,6 +5,7 @@ vmcore_SRC_FILES := \
Attributes.cpp \
AutoUpgrade.cpp \
BasicBlock.cpp \
+ Comdat.cpp \
ConstantFold.cpp \
ConstantRange.cpp \
Constants.cpp \
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 0fef0d0..a7499bc 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -106,6 +106,7 @@ static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
enum PrefixType {
GlobalPrefix,
+ ComdatPrefix,
LabelPrefix,
LocalPrefix,
NoPrefix
@@ -119,6 +120,7 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
switch (Prefix) {
case NoPrefix: break;
case GlobalPrefix: OS << '@'; break;
+ case ComdatPrefix: OS << '$'; break;
case LabelPrefix: break;
case LocalPrefix: OS << '%'; break;
}
@@ -1165,8 +1167,15 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
void AssemblyWriter::init() {
- if (TheModule)
- TypePrinter.incorporateTypes(*TheModule);
+ if (!TheModule)
+ return;
+ TypePrinter.incorporateTypes(*TheModule);
+ for (const Function &F : *TheModule)
+ if (const Comdat *C = F.getComdat())
+ Comdats.insert(C);
+ for (const GlobalVariable &GV : TheModule->globals())
+ if (const Comdat *C = GV.getComdat())
+ Comdats.insert(C);
}
@@ -1308,6 +1317,15 @@ void AssemblyWriter::printModule(const Module *M) {
printTypeIdentities();
+ // Output all comdats.
+ if (!Comdats.empty())
+ Out << '\n';
+ for (const Comdat *C : Comdats) {
+ printComdat(C);
+ if (C != Comdats.back())
+ Out << '\n';
+ }
+
// Output all globals.
if (!M->global_empty()) Out << '\n';
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
@@ -1451,10 +1469,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
PrintVisibility(GV->getVisibility(), Out);
PrintDLLStorageClass(GV->getDLLStorageClass(), Out);
PrintThreadLocalModel(GV->getThreadLocalMode(), Out);
+ if (GV->hasUnnamedAddr())
+ Out << "unnamed_addr ";
if (unsigned AddressSpace = GV->getType()->getAddressSpace())
Out << "addrspace(" << AddressSpace << ") ";
- if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
if (GV->isExternallyInitialized()) Out << "externally_initialized ";
Out << (GV->isConstant() ? "constant " : "global ");
TypePrinter.print(GV->getType()->getElementType(), Out);
@@ -1469,6 +1488,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
PrintEscapedString(GV->getSection(), Out);
Out << '"';
}
+ if (GV->hasComdat()) {
+ Out << ", comdat ";
+ PrintLLVMName(Out, GV->getComdat()->getName(), ComdatPrefix);
+ }
if (GV->getAlignment())
Out << ", align " << GV->getAlignment();
@@ -1488,21 +1511,18 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
}
PrintVisibility(GA->getVisibility(), Out);
PrintDLLStorageClass(GA->getDLLStorageClass(), Out);
+ PrintThreadLocalModel(GA->getThreadLocalMode(), Out);
+ if (GA->hasUnnamedAddr())
+ Out << "unnamed_addr ";
Out << "alias ";
PrintLinkage(GA->getLinkage(), Out);
- PointerType *Ty = GA->getType();
const Constant *Aliasee = GA->getAliasee();
- if (!Aliasee || Ty != Aliasee->getType()) {
- if (unsigned AddressSpace = Ty->getAddressSpace())
- Out << "addrspace(" << AddressSpace << ") ";
- TypePrinter.print(Ty->getElementType(), Out);
- Out << ", ";
- }
if (!Aliasee) {
+ TypePrinter.print(GA->getType(), Out);
Out << " <<NULL ALIASEE>>";
} else {
writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
@@ -1512,6 +1532,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
Out << '\n';
}
+void AssemblyWriter::printComdat(const Comdat *C) {
+ C->print(Out);
+}
+
void AssemblyWriter::printTypeIdentities() {
if (TypePrinter.NumberedTypes.empty() &&
TypePrinter.NamedTypes.empty())
@@ -1649,6 +1673,10 @@ void AssemblyWriter::printFunction(const Function *F) {
PrintEscapedString(F->getSection(), Out);
Out << '"';
}
+ if (F->hasComdat()) {
+ Out << " comdat ";
+ PrintLLVMName(Out, F->getComdat()->getName(), ComdatPrefix);
+ }
if (F->getAlignment())
Out << " align " << F->getAlignment();
if (F->hasGC())
@@ -1788,6 +1816,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
(isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic()))
Out << " atomic";
+ if (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isWeak())
+ Out << " weak";
+
// If this is a volatile operation, print out the volatile marker.
if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) ||
(isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) ||
@@ -2157,11 +2188,32 @@ void NamedMDNode::print(raw_ostream &ROS) const {
W.printNamedMDNode(this);
}
-void Type::print(raw_ostream &OS) const {
- if (!this) {
- OS << "<null Type>";
- return;
+void Comdat::print(raw_ostream &ROS) const {
+ PrintLLVMName(ROS, getName(), ComdatPrefix);
+ ROS << " = comdat ";
+
+ switch (getSelectionKind()) {
+ case Comdat::Any:
+ ROS << "any";
+ break;
+ case Comdat::ExactMatch:
+ ROS << "exactmatch";
+ break;
+ case Comdat::Largest:
+ ROS << "largest";
+ break;
+ case Comdat::NoDuplicates:
+ ROS << "noduplicates";
+ break;
+ case Comdat::SameSize:
+ ROS << "samesize";
+ break;
}
+
+ ROS << '\n';
+}
+
+void Type::print(raw_ostream &OS) const {
TypePrinting TP;
TP.print(const_cast<Type*>(this), OS);
@@ -2174,10 +2226,6 @@ void Type::print(raw_ostream &OS) const {
}
void Value::print(raw_ostream &ROS) const {
- if (!this) {
- ROS << "printing a <null> value\n";
- return;
- }
formatted_raw_ostream OS(ROS);
if (const Instruction *I = dyn_cast<Instruction>(this)) {
const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr;
@@ -2248,5 +2296,8 @@ void Type::dump() const { print(dbgs()); }
// Module::dump() - Allow printing of Modules from the debugger.
void Module::dump() const { print(dbgs(), nullptr); }
+// \brief Allow printing of Comdats from the debugger.
+void Comdat::dump() const { print(dbgs()); }
+
// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
void NamedMDNode::dump() const { print(dbgs()); }
diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h
index b4ce6de..aef9c8a 100644
--- a/lib/IR/AsmWriter.h
+++ b/lib/IR/AsmWriter.h
@@ -16,6 +16,7 @@
#define LLVM_IR_ASSEMBLYWRITER_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/TypeFinder.h"
@@ -26,6 +27,7 @@ namespace llvm {
class BasicBlock;
class Function;
class GlobalValue;
+class Comdat;
class Module;
class NamedMDNode;
class Value;
@@ -70,6 +72,7 @@ private:
SlotTracker &Machine;
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
+ SetVector<const Comdat *> Comdats;
public:
/// Construct an AssemblyWriter with an external SlotTracker
@@ -101,6 +104,7 @@ public:
void printTypeIdentities();
void printGlobal(const GlobalVariable *GV);
void printAlias(const GlobalAlias *GV);
+ void printComdat(const Comdat *C);
void printFunction(const Function *F);
void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx);
void printBasicBlock(const BasicBlock *BB);
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index a9074bb..48a2ce8 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -173,6 +173,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "inlinehint";
if (hasAttribute(Attribute::InReg))
return "inreg";
+ if (hasAttribute(Attribute::JumpTable))
+ return "jumptable";
if (hasAttribute(Attribute::MinSize))
return "minsize";
if (hasAttribute(Attribute::Naked))
@@ -291,7 +293,7 @@ bool Attribute::operator<(Attribute A) const {
// AttributeImpl Definition
//===----------------------------------------------------------------------===//
-// Pin the vtabels to this file.
+// Pin the vtables to this file.
AttributeImpl::~AttributeImpl() {}
void EnumAttributeImpl::anchor() {}
void AlignAttributeImpl::anchor() {}
@@ -395,6 +397,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::OptimizeNone: return 1ULL << 42;
case Attribute::InAlloca: return 1ULL << 43;
case Attribute::NonNull: return 1ULL << 44;
+ case Attribute::JumpTable: return 1ULL << 45;
}
llvm_unreachable("Unsupported attribute type");
}
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index e255113..6554b3c 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -114,6 +114,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx.movnt.pd.256" ||
Name == "x86.avx.movnt.ps.256" ||
Name == "x86.sse42.crc32.64.8" ||
+ Name == "x86.avx.vbroadcast.ss" ||
+ Name == "x86.avx.vbroadcast.ss.256" ||
+ Name == "x86.avx.vbroadcast.sd.256" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
NewFn = nullptr;
return true;
@@ -335,6 +338,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1));
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
+ } else if (Name.startswith("llvm.x86.avx.vbroadcast")) {
+ // Replace broadcasts with a series of insertelements.
+ Type *VecTy = CI->getType();
+ Type *EltTy = VecTy->getVectorElementType();
+ unsigned EltNum = VecTy->getVectorNumElements();
+ Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
+ EltTy->getPointerTo());
+ Value *Load = Builder.CreateLoad(Cast);
+ Type *I32Ty = Type::getInt32Ty(C);
+ Rep = UndefValue::get(VecTy);
+ for (unsigned I = 0; I < EltNum; ++I)
+ Rep = Builder.CreateInsertElement(Rep, Load,
+ ConstantInt::get(I32Ty, I));
} else {
bool PD128 = false, PD256 = false, PS128 = false, PS256 = false;
if (Name == "llvm.x86.avx.vpermil.pd.256")
@@ -561,3 +577,10 @@ bool llvm::UpgradeDebugInfo(Module &M) {
}
return RetCode;
}
+
+void llvm::UpgradeMDStringConstant(std::string &String) {
+ const std::string OldPrefix = "llvm.vectorizer.";
+ if (String.find(OldPrefix) == 0) {
+ String.replace(0, OldPrefix.size(), "llvm.loop.vectorize.");
+ }
+}
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index b027ae5..38a80b1 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMCore
Attributes.cpp
AutoUpgrade.cpp
BasicBlock.cpp
+ Comdat.cpp
ConstantFold.cpp
ConstantRange.cpp
Constants.cpp
diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp
new file mode 100644
index 0000000..80715ff
--- /dev/null
+++ b/lib/IR/Comdat.cpp
@@ -0,0 +1,25 @@
+//===-- Comdat.cpp - Implement Metadata classes --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Comdat class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Comdat.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+Comdat::Comdat(SelectionKind SK, StringMapEntry<Comdat> *Name)
+ : Name(Name), SK(SK) {}
+
+Comdat::Comdat(Comdat &&C) : Name(C.Name), SK(C.SK) {}
+
+Comdat::Comdat() : Name(nullptr), SK(Comdat::Any) {}
+
+StringRef Comdat::getName() const { return Name->first(); }
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 706e66f..395ac39 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -529,7 +529,10 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// Try hard to fold cast of cast because they are often eliminable.
if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
- } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+ } else if (CE->getOpcode() == Instruction::GetElementPtr &&
+ // Do not fold addrspacecast (gep 0, .., 0). It might make the
+ // addrspacecast uncanonicalized.
+ opc != Instruction::AddrSpaceCast) {
// If all of the indexes in the GEP are null values, there is no pointer
// adjustment going on. We might as well cast the source pointer.
bool isAllNull = true;
@@ -1331,6 +1334,15 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
return FCmpInst::BAD_FCMP_PREDICATE;
}
+static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1,
+ const GlobalValue *GV2) {
+ // Don't try to decide equality of aliases.
+ if (!isa<GlobalAlias>(GV1) && !isa<GlobalAlias>(GV2))
+ if (!GV1->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+ return ICmpInst::ICMP_NE;
+ return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
/// evaluateICmpRelation - This function determines if there is anything we can
/// decide about the two constants provided. This doesn't need to handle simple
/// things like integer comparisons, but should instead handle ConstantExprs
@@ -1392,10 +1404,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
// constant (which, since the types must match, means that it's a
// ConstantPointerNull).
if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
- // Don't try to decide equality of aliases.
- if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
- if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
- return ICmpInst::ICMP_NE;
+ return areGlobalsPotentiallyEqual(GV, GV2);
} else if (isa<BlockAddress>(V2)) {
return ICmpInst::ICMP_NE; // Globals never equal labels.
} else {
@@ -1460,7 +1469,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
}
break;
- case Instruction::GetElementPtr:
+ case Instruction::GetElementPtr: {
+ GEPOperator *CE1GEP = cast<GEPOperator>(CE1);
// Ok, since this is a getelementptr, we know that the constant has a
// pointer type. Check the various cases.
if (isa<ConstantPointerNull>(V2)) {
@@ -1507,7 +1517,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
"Surprising getelementptr!");
return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
} else {
- // If they are different globals, we don't know what the value is.
+ if (CE1GEP->hasAllZeroIndices())
+ return areGlobalsPotentiallyEqual(GV, GV2);
return ICmpInst::BAD_ICMP_PREDICATE;
}
}
@@ -1523,8 +1534,14 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
// By far the most common case to handle is when the base pointers are
// obviously to the same global.
if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
- if (CE1Op0 != CE2Op0) // Don't know relative ordering.
+ // Don't know relative ordering, but check for inequality.
+ if (CE1Op0 != CE2Op0) {
+ GEPOperator *CE2GEP = cast<GEPOperator>(CE2);
+ if (CE1GEP->hasAllZeroIndices() && CE2GEP->hasAllZeroIndices())
+ return areGlobalsPotentiallyEqual(cast<GlobalValue>(CE1Op0),
+ cast<GlobalValue>(CE2Op0));
return ICmpInst::BAD_ICMP_PREDICATE;
+ }
// Ok, we know that both getelementptr instructions are based on the
// same global. From this, we can precisely determine the relative
// ordering of the resultant pointers.
@@ -1570,6 +1587,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
}
}
}
+ }
default:
break;
}
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index bb8d60b..b815936 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -107,6 +107,28 @@ bool Constant::isAllOnesValue() const {
return false;
}
+bool Constant::isMinSignedValue() const {
+ // Check for INT_MIN integers
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+ return CI->isMinValue(/*isSigned=*/true);
+
+ // Check for FP which are bitcasted from INT_MIN integers
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+ return CFP->getValueAPF().bitcastToAPInt().isMinSignedValue();
+
+ // Check for constant vectors which are splats of INT_MIN values.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isMinSignedValue();
+
+ // Check for constant vectors which are splats of INT_MIN values.
+ if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+ if (Constant *Splat = CV->getSplatValue())
+ return Splat->isMinSignedValue();
+
+ return false;
+}
+
// Constructor to create a '0' constant of arbitrary type...
Constant *Constant::getNullValue(Type *Ty) {
switch (Ty->getTypeID()) {
@@ -278,35 +300,48 @@ bool Constant::canTrap() const {
return canTrapImpl(this, NonTrappingOps);
}
-/// isThreadDependent - Return true if the value can vary between threads.
-bool Constant::isThreadDependent() const {
- SmallPtrSet<const Constant*, 64> Visited;
- SmallVector<const Constant*, 64> WorkList;
- WorkList.push_back(this);
- Visited.insert(this);
+/// Check if C contains a GlobalValue for which Predicate is true.
+static bool
+ConstHasGlobalValuePredicate(const Constant *C,
+ bool (*Predicate)(const GlobalValue *)) {
+ SmallPtrSet<const Constant *, 8> Visited;
+ SmallVector<const Constant *, 8> WorkList;
+ WorkList.push_back(C);
+ Visited.insert(C);
while (!WorkList.empty()) {
- const Constant *C = WorkList.pop_back_val();
-
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->isThreadLocal())
+ const Constant *WorkItem = WorkList.pop_back_val();
+ if (const auto *GV = dyn_cast<GlobalValue>(WorkItem))
+ if (Predicate(GV))
return true;
- }
-
- for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) {
- const Constant *D = dyn_cast<Constant>(C->getOperand(I));
- if (!D)
+ for (const Value *Op : WorkItem->operands()) {
+ const Constant *ConstOp = dyn_cast<Constant>(Op);
+ if (!ConstOp)
continue;
- if (Visited.insert(D))
- WorkList.push_back(D);
+ if (Visited.insert(ConstOp))
+ WorkList.push_back(ConstOp);
}
}
-
return false;
}
-/// isConstantUsed - Return true if the constant has users other than constant
-/// exprs and other dangling things.
+/// Return true if the value can vary between threads.
+bool Constant::isThreadDependent() const {
+ auto DLLImportPredicate = [](const GlobalValue *GV) {
+ return GV->isThreadLocal();
+ };
+ return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+bool Constant::isDLLImportDependent() const {
+ auto DLLImportPredicate = [](const GlobalValue *GV) {
+ return GV->hasDLLImportStorageClass();
+ };
+ return ConstHasGlobalValuePredicate(this, DLLImportPredicate);
+}
+
+/// Return true if the constant has users other than constant exprs and other
+/// dangling things.
bool Constant::isConstantUsed() const {
for (const User *U : users()) {
const Constant *UC = dyn_cast<Constant>(U);
@@ -1698,6 +1733,19 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) {
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
+ // Canonicalize addrspacecasts between different pointer types by first
+ // bitcasting the pointer type and then converting the address space.
+ PointerType *SrcScalarTy = cast<PointerType>(C->getType()->getScalarType());
+ PointerType *DstScalarTy = cast<PointerType>(DstTy->getScalarType());
+ Type *DstElemTy = DstScalarTy->getElementType();
+ if (SrcScalarTy->getElementType() != DstElemTy) {
+ Type *MidTy = PointerType::get(DstElemTy, SrcScalarTy->getAddressSpace());
+ if (VectorType *VT = dyn_cast<VectorType>(DstTy)) {
+ // Handle vectors of pointers.
+ MidTy = VectorType::get(MidTy, VT->getNumElements());
+ }
+ C = getBitCast(C, MidTy);
+ }
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy);
}
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 27ce503..87099a6 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -17,9 +17,9 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -35,10 +35,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
+#include <system_error>
using namespace llvm;
@@ -281,7 +281,11 @@ char *LLVMPrintTypeToString(LLVMTypeRef Ty) {
std::string buf;
raw_string_ostream os(buf);
- unwrap(Ty)->print(os);
+ if (unwrap(Ty))
+ unwrap(Ty)->print(os);
+ else
+ os << "Printing <null> Type";
+
os.flush();
return strdup(buf.c_str());
@@ -531,7 +535,11 @@ char* LLVMPrintValueToString(LLVMValueRef Val) {
std::string buf;
raw_string_ostream os(buf);
- unwrap(Val)->print(os);
+ if (unwrap(Val))
+ unwrap(Val)->print(os);
+ else
+ os << "Printing <null> Value";
+
os.flush();
return strdup(buf.c_str());
@@ -1286,7 +1294,7 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
}
const char *LLVMGetSection(LLVMValueRef Global) {
- return unwrap<GlobalValue>(Global)->getSection().c_str();
+ return unwrap<GlobalValue>(Global)->getSection();
}
void LLVMSetSection(LLVMValueRef Global, const char *Section) {
@@ -2598,28 +2606,24 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec;
- if (!(ec = MemoryBuffer::getFile(Path, MB))) {
- *OutMemBuf = wrap(MB.release());
- return 0;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(Path);
+ if (std::error_code EC = MBOrErr.getError()) {
+ *OutMessage = strdup(EC.message().c_str());
+ return 1;
}
-
- *OutMessage = strdup(ec.message().c_str());
- return 1;
+ *OutMemBuf = wrap(MBOrErr.get().release());
+ return 0;
}
LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec;
- if (!(ec = MemoryBuffer::getSTDIN(MB))) {
- *OutMemBuf = wrap(MB.release());
- return 0;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getSTDIN();
+ if (std::error_code EC = MBOrErr.getError()) {
+ *OutMessage = strdup(EC.message().c_str());
+ return 1;
}
-
- *OutMessage = strdup(ec.message().c_str());
- return 1;
+ *OutMemBuf = wrap(MBOrErr.get().release());
+ return 0;
}
LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(
@@ -2700,11 +2704,10 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM) {
/*===-- Threading ------------------------------------------------------===*/
LLVMBool LLVMStartMultithreaded() {
- return llvm_start_multithreaded();
+ return LLVMIsMultithreaded();
}
void LLVMStopMultithreaded() {
- llvm_stop_multithreaded();
}
LLVMBool LLVMIsMultithreaded() {
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 92edacc..218787c 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -102,7 +102,8 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
StringRef Producer, bool isOptimized,
StringRef Flags, unsigned RunTimeVer,
StringRef SplitName,
- DebugEmissionKind Kind) {
+ DebugEmissionKind Kind,
+ bool EmitDebugInfo) {
assert(((Lang <= dwarf::DW_LANG_OCaml && Lang >= dwarf::DW_LANG_C89) ||
(Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
@@ -140,8 +141,14 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
MDNode *CUNode = MDNode::get(VMContext, Elts);
// Create a named metadata so that it is easier to find cu in a module.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
- NMD->addOperand(CUNode);
+ // Note that we only generate this when the caller wants to actually
+ // emit debug information. When we are only interested in tracking
+ // source line locations throughout the backend, we prevent codegen from
+ // emitting debug info in the final output by not generating llvm.dbg.cu.
+ if (EmitDebugInfo) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
+ NMD->addOperand(CUNode);
+ }
return DICompileUnit(CUNode);
}
@@ -1068,18 +1075,19 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
DITypeRef Ty,
ArrayRef<Value *> Addr,
unsigned ArgNo) {
- SmallVector<Value *, 15> Elts;
- Elts.push_back(GetTagConstant(VMContext, Tag));
- Elts.push_back(getNonCompileUnitScope(Scope)),
- Elts.push_back(MDString::get(VMContext, Name));
- Elts.push_back(F);
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
- (LineNo | (ArgNo << 24))));
- Elts.push_back(Ty);
- Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
- Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
- Elts.append(Addr.begin(), Addr.end());
-
+ assert(Addr.size() > 0 && "complex address is empty");
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ getNonCompileUnitScope(Scope),
+ MDString::get(VMContext, Name),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext),
+ (LineNo | (ArgNo << 24))),
+ Ty,
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ MDNode::get(VMContext, Addr)
+ };
return DIVariable(MDNode::get(VMContext, Elts));
}
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index db9e56d..5e39b24 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -138,8 +138,14 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
}
}
-unsigned DIVariable::getNumAddrElements() const {
- return DbgNode->getNumOperands() - 8;
+uint64_t DIVariable::getAddrElement(unsigned Idx) const {
+ DIDescriptor ComplexExpr = getDescriptorField(8);
+ if (Idx < ComplexExpr->getNumOperands())
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(ComplexExpr->getOperand(Idx)))
+ return CI->getZExtValue();
+
+ assert(false && "non-existing complex address element requested");
+ return 0;
}
/// getInlinedAt - If this variable is inlined then return inline location.
@@ -566,7 +572,13 @@ bool DIVariable::Verify() const {
// Make sure that type @ field 5 is a DITypeRef.
if (!fieldIsTypeRef(DbgNode, 5))
return false;
- return DbgNode->getNumOperands() >= 8;
+
+ // Variable without a complex expression.
+ if (DbgNode->getNumOperands() == 8)
+ return true;
+
+ // Make sure the complex expression is an MDNode.
+ return (DbgNode->getNumOperands() == 9 && fieldIsMDNode(DbgNode, 8));
}
/// Verify - Verify that a location descriptor is well formed.
@@ -1514,3 +1526,23 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
return 0;
return cast<ConstantInt>(Val)->getZExtValue();
}
+
+llvm::DenseMap<const llvm::Function *, llvm::DISubprogram>
+llvm::makeSubprogramMap(const Module &M) {
+ DenseMap<const Function *, DISubprogram> R;
+
+ NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu");
+ if (!CU_Nodes)
+ return R;
+
+ for (MDNode *N : CU_Nodes->operands()) {
+ DICompileUnit CUNode(N);
+ DIArray SPs = CUNode.getSubprograms();
+ for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+ DISubprogram SP(SPs.getElement(i));
+ if (Function *F = SP.getFunction())
+ R.insert(std::make_pair(F, SP));
+ }
+ }
+ return R;
+}
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 43360d3..e8bdcce 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -76,7 +76,7 @@ MDNode *DebugLoc::getScopeNode(const LLVMContext &Ctx) const {
return getScope(Ctx);
}
-DebugLoc DebugLoc::getFnDebugLoc(const LLVMContext &Ctx) {
+DebugLoc DebugLoc::getFnDebugLoc(const LLVMContext &Ctx) const {
const MDNode *Scope = getScopeNode(Ctx);
DISubprogram SP = getDISubprogram(Scope);
if (SP.isSubprogram()) {
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index 6eeb162..2727063 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -128,7 +128,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
}
bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const {
- return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr;
+ return getDebugLoc().isUnknown() == false;
}
void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename,
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index fe32c46..1443571 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -735,6 +735,11 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
#include "llvm/IR/Intrinsics.gen"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+// This defines the "Intrinsic::getIntrinsicForMSBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
+#include "llvm/IR/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN
+
/// hasAddressTaken - returns true if there are any uses of this function
/// other than direct calls or invokes to it.
bool Function::hasAddressTaken(const User* *PutOffender) const {
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index f2099d6..1667401 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -19,8 +19,8 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/system_error.h"
#include <algorithm>
+#include <system_error>
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -438,11 +438,15 @@ class LineConsumer {
StringRef Remaining;
public:
LineConsumer(StringRef Filename) {
- if (error_code EC = MemoryBuffer::getFileOrSTDIN(Filename, Buffer)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = BufferOrErr.getError()) {
errs() << Filename << ": " << EC.message() << "\n";
Remaining = "";
- } else
+ } else {
+ Buffer = std::move(BufferOrErr.get());
Remaining = Buffer->getBuffer();
+ }
}
bool empty() { return Remaining.empty(); }
void printNext(raw_ostream &OS, uint32_t LineNum) {
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index c905cfe..244e3e4 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LeakDetector.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -59,9 +60,16 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
}
unsigned GlobalValue::getAlignment() const {
- if (auto *GA = dyn_cast<GlobalAlias>(this))
- return GA->getAliasee()->getAlignment();
-
+ if (auto *GA = dyn_cast<GlobalAlias>(this)) {
+ // In general we cannot compute this at the IR level, but we try.
+ if (const GlobalObject *GO = GA->getBaseObject())
+ return GO->getAlignment();
+
+ // FIXME: we should also be able to handle:
+ // Alias = Global + Offset
+ // Alias = Absolute
+ return 0;
+ }
return cast<GlobalObject>(this)->getAlignment();
}
@@ -80,12 +88,26 @@ void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
setSection(GV->getSection());
}
-const std::string &GlobalValue::getSection() const {
- if (auto *GA = dyn_cast<GlobalAlias>(this))
- return GA->getAliasee()->getSection();
+const char *GlobalValue::getSection() const {
+ if (auto *GA = dyn_cast<GlobalAlias>(this)) {
+ // In general we cannot compute this at the IR level, but we try.
+ if (const GlobalObject *GO = GA->getBaseObject())
+ return GO->getSection();
+ return "";
+ }
return cast<GlobalObject>(this)->getSection();
}
+Comdat *GlobalValue::getComdat() {
+ if (auto *GA = dyn_cast<GlobalAlias>(this)) {
+ // In general we cannot compute this at the IR level, but we try.
+ if (const GlobalObject *GO = GA->getBaseObject())
+ return const_cast<GlobalObject *>(GO)->getComdat();
+ return nullptr;
+ }
+ return cast<GlobalObject>(this)->getComdat();
+}
+
void GlobalObject::setSection(StringRef S) { Section = S; }
bool GlobalValue::isDeclaration() const {
@@ -113,8 +135,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
: GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
OperandTraits<GlobalVariable>::op_begin(this),
InitVal != nullptr, Link, Name),
- isConstantGlobal(constant), threadLocalMode(TLMode),
+ isConstantGlobal(constant),
isExternallyInitializedConstant(isExternallyInitialized) {
+ setThreadLocalMode(TLMode);
if (InitVal) {
assert(InitVal->getType() == Ty &&
"Initializer should be the same type as the GlobalVariable!");
@@ -132,8 +155,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
: GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
OperandTraits<GlobalVariable>::op_begin(this),
InitVal != nullptr, Link, Name),
- isConstantGlobal(constant), threadLocalMode(TLMode),
+ isConstantGlobal(constant),
isExternallyInitializedConstant(isExternallyInitialized) {
+ setThreadLocalMode(TLMode);
if (InitVal) {
assert(InitVal->getType() == Ty &&
"Initializer should be the same type as the GlobalVariable!");
@@ -214,7 +238,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
//===----------------------------------------------------------------------===//
GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
- const Twine &Name, GlobalObject *Aliasee,
+ const Twine &Name, Constant *Aliasee,
Module *ParentModule)
: GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalAliasVal,
&Op<0>(), 1, Link, Name) {
@@ -227,7 +251,7 @@ GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
LinkageTypes Link, const Twine &Name,
- GlobalObject *Aliasee, Module *ParentModule) {
+ Constant *Aliasee, Module *ParentModule) {
return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule);
}
@@ -239,18 +263,18 @@ GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
LinkageTypes Linkage, const Twine &Name,
- GlobalObject *Aliasee) {
+ GlobalValue *Aliasee) {
return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent());
}
GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name,
- GlobalObject *Aliasee) {
+ GlobalValue *Aliasee) {
PointerType *PTy = Aliasee->getType();
return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name,
Aliasee);
}
-GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalObject *Aliasee) {
+GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
return create(Aliasee->getLinkage(), Name, Aliasee);
}
@@ -270,4 +294,8 @@ void GlobalAlias::eraseFromParent() {
getParent()->getAliasList().erase(this);
}
-void GlobalAlias::setAliasee(GlobalObject *Aliasee) { setOperand(0, Aliasee); }
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+ assert((!Aliasee || Aliasee->getType() == getType()) &&
+ "Alias and aliasee types should match!");
+ setOperand(0, Aliasee);
+}
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index 28cc4cb..86421c4 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -145,31 +145,31 @@ void Instruction::setFastMathFlags(FastMathFlags FMF) {
/// Determine whether the unsafe-algebra flag is set.
bool Instruction::hasUnsafeAlgebra() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
}
/// Determine whether the no-NaNs flag is set.
bool Instruction::hasNoNaNs() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->hasNoNaNs();
}
/// Determine whether the no-infs flag is set.
bool Instruction::hasNoInfs() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->hasNoInfs();
}
/// Determine whether the no-signed-zeros flag is set.
bool Instruction::hasNoSignedZeros() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->hasNoSignedZeros();
}
/// Determine whether the allow-reciprocal flag is set.
bool Instruction::hasAllowReciprocal() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->hasAllowReciprocal();
}
@@ -177,7 +177,7 @@ bool Instruction::hasAllowReciprocal() const {
/// operator which supports these flags. See LangRef.html for the meaning of
/// these flats.
FastMathFlags Instruction::getFastMathFlags() const {
- assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+ assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
return cast<FPMathOperator>(this)->getFastMathFlags();
}
@@ -300,6 +300,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
+ CXI->isWeak() == cast<AtomicCmpXchgInst>(I2)->isWeak() &&
CXI->getSuccessOrdering() ==
cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
CXI->getFailureOrdering() ==
@@ -331,6 +332,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
getType() != I->getType())
return false;
+ // If both instructions have no operands, they are identical.
+ if (getNumOperands() == 0 && I->getNumOperands() == 0)
+ return haveSameSpecialState(this, I);
+
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same.
if (!std::equal(op_begin(), op_end(), I->op_begin()))
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 13c51b8..a5ceacb 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -248,7 +248,7 @@ void LandingPadInst::growOperands(unsigned Size) {
Use::zap(OldOps, OldOps + e, true);
}
-void LandingPadInst::addClause(Value *Val) {
+void LandingPadInst::addClause(Constant *Val) {
unsigned OpNo = getNumOperands();
growOperands(1);
assert(OpNo < ReservedSpace && "Growing didn't work!");
@@ -1251,10 +1251,11 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope,
Instruction *InsertBefore)
- : Instruction(Cmp->getType(), AtomicCmpXchg,
- OperandTraits<AtomicCmpXchgInst>::op_begin(this),
- OperandTraits<AtomicCmpXchgInst>::operands(this),
- InsertBefore) {
+ : Instruction(
+ StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()),
+ nullptr),
+ AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this), InsertBefore) {
Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
}
@@ -1263,13 +1264,14 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope,
BasicBlock *InsertAtEnd)
- : Instruction(Cmp->getType(), AtomicCmpXchg,
- OperandTraits<AtomicCmpXchgInst>::op_begin(this),
- OperandTraits<AtomicCmpXchgInst>::operands(this),
- InsertAtEnd) {
+ : Instruction(
+ StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()),
+ nullptr),
+ AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this),
+ OperandTraits<AtomicCmpXchgInst>::operands(this), InsertAtEnd) {
Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope);
}
-
+
//===----------------------------------------------------------------------===//
// AtomicRMWInst Implementation
//===----------------------------------------------------------------------===//
@@ -2331,18 +2333,12 @@ unsigned CastInst::isEliminableCastPair(
// Allowed, use first cast's opcode
return firstOp;
case 14:
- // FIXME: this state can be merged with (2), but the following assert
- // is useful to check the correcteness of the sequence due to semantic
- // change of bitcast.
- assert(
- SrcTy->isPtrOrPtrVectorTy() &&
- MidTy->isPtrOrPtrVectorTy() &&
- DstTy->isPtrOrPtrVectorTy() &&
- SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() &&
- MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() &&
- "Illegal bitcast, addrspacecast sequence!");
- // Allowed, use second cast's opcode
- return secondOp;
+ // bitcast, addrspacecast -> addrspacecast if the element type of
+ // bitcast's source is the same as that of addrspacecast's destination.
+ if (SrcTy->getPointerElementType() == DstTy->getPointerElementType())
+ return Instruction::AddrSpaceCast;
+ return 0;
+
case 15:
// FIXME: this state can be merged with (1), but the following assert
// is useful to check the correcteness of the sequence due to semantic
@@ -3610,6 +3606,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const {
getSuccessOrdering(), getFailureOrdering(),
getSynchScope());
Result->setVolatile(isVolatile());
+ Result->setWeak(isWeak());
return Result;
}
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 4d932d0..59137e4 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -663,7 +663,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
// Otherwise, we're removing metadata from an instruction.
assert((hasMetadataHashEntry() ==
- getContext().pImpl->MetadataStore.count(this)) &&
+ (getContext().pImpl->MetadataStore.count(this) > 0)) &&
"HasMetadata bit out of date!");
if (!hasMetadataHashEntry())
return; // Nothing to remove!
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 5dbed69..f1b1f9a 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -24,6 +24,8 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LeakDetector.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/RandomNumberGenerator.h"
#include <algorithm>
#include <cstdarg>
#include <cstdlib>
@@ -44,7 +46,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
//
Module::Module(StringRef MID, LLVMContext &C)
- : Context(C), Materializer(), ModuleID(MID), DL("") {
+ : Context(C), Materializer(), ModuleID(MID), RNG(nullptr), DL("") {
ValSymTab = new ValueSymbolTable();
NamedMDSymTab = new StringMap<NamedMDNode *>();
Context.addModule(this);
@@ -59,6 +61,7 @@ Module::~Module() {
NamedMDList.clear();
delete ValSymTab;
delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+ delete RNG;
}
/// getNamedValue - Return the first global value in the module with
@@ -355,6 +358,16 @@ const DataLayout *Module::getDataLayout() const {
return &DL;
}
+// We want reproducible builds, but ModuleID may be a full path so we just use
+// the filename to salt the RNG (although it is not guaranteed to be unique).
+RandomNumberGenerator &Module::getRNG() const {
+ if (RNG == nullptr) {
+ StringRef Salt = sys::path::filename(ModuleID);
+ RNG = new RandomNumberGenerator(Salt);
+ }
+ return *RNG;
+}
+
//===----------------------------------------------------------------------===//
// Methods to control the materialization of GlobalValues in the Module.
//
@@ -381,7 +394,7 @@ bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
if (!Materializer)
return false;
- error_code EC = Materializer->Materialize(GV);
+ std::error_code EC = Materializer->Materialize(GV);
if (!EC)
return false;
if (ErrInfo)
@@ -394,18 +407,21 @@ void Module::Dematerialize(GlobalValue *GV) {
return Materializer->Dematerialize(GV);
}
-error_code Module::materializeAll() {
+std::error_code Module::materializeAll() {
if (!Materializer)
- return error_code::success();
+ return std::error_code();
return Materializer->MaterializeModule(this);
}
-error_code Module::materializeAllPermanently() {
- if (error_code EC = materializeAll())
+std::error_code Module::materializeAllPermanently(bool ReleaseBuffer) {
+ if (std::error_code EC = materializeAll())
return EC;
+ if (ReleaseBuffer)
+ Materializer->releaseBuffer();
+
Materializer.reset();
- return error_code::success();
+ return std::error_code();
}
//===----------------------------------------------------------------------===//
@@ -421,14 +437,14 @@ error_code Module::materializeAllPermanently() {
// has "dropped all references", except operator delete.
//
void Module::dropAllReferences() {
- for(Module::iterator I = begin(), E = end(); I != E; ++I)
- I->dropAllReferences();
+ for (Function &F : *this)
+ F.dropAllReferences();
- for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
- I->dropAllReferences();
+ for (GlobalVariable &GV : globals())
+ GV.dropAllReferences();
- for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
- I->dropAllReferences();
+ for (GlobalAlias &GA : aliases())
+ GA.dropAllReferences();
}
unsigned Module::getDwarfVersion() const {
@@ -437,3 +453,11 @@ unsigned Module::getDwarfVersion() const {
return dwarf::DWARF_VERSION;
return cast<ConstantInt>(Val)->getZExtValue();
}
+
+Comdat *Module::getOrInsertComdat(StringRef Name) {
+ Comdat C;
+ StringMapEntry<Comdat> &Entry =
+ ComdatSymTab.GetOrCreateValue(Name, std::move(C));
+ Entry.second.Name = &Entry;
+ return &Entry.second;
+}
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index bb55d2a..91d86ae 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -199,14 +199,6 @@ Pass *Pass::createPass(AnalysisID ID) {
return PI->createPass();
}
-Pass *PassInfo::createPass() const {
- assert((!isAnalysisGroup() || NormalCtor) &&
- "No default implementation found for analysis group!");
- assert(NormalCtor &&
- "Cannot call createPass on PassInfo without default ctor!");
- return NormalCtor();
-}
-
//===----------------------------------------------------------------------===//
// Analysis Group Implementation Code
//===----------------------------------------------------------------------===//
@@ -224,17 +216,6 @@ RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
// PassRegistrationListener implementation
//
-// PassRegistrationListener ctor - Add the current object to the list of
-// PassRegistrationListeners...
-PassRegistrationListener::PassRegistrationListener() {
- PassRegistry::getPassRegistry()->addRegistrationListener(this);
-}
-
-// dtor - Remove object from list of listeners...
-PassRegistrationListener::~PassRegistrationListener() {
- PassRegistry::getPassRegistry()->removeRegistrationListener(this);
-}
-
// enumeratePasses - Iterate over the registered passes, calling the
// passEnumerate callback on each PassInfo object.
//
@@ -242,7 +223,16 @@ void PassRegistrationListener::enumeratePasses() {
PassRegistry::getPassRegistry()->enumerateWith(this);
}
-PassNameParser::~PassNameParser() {}
+PassNameParser::PassNameParser()
+ : Opt(nullptr) {
+ PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+PassNameParser::~PassNameParser() {
+ // This only gets called during static destruction, in which case the
+ // PassRegistry will have already been destroyed by llvm_shutdown(). So
+ // attempting to remove the registration listener is an error.
+}
//===----------------------------------------------------------------------===//
// AnalysisUsage Class Implementation
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index 6a5bee2..91940a9 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -13,14 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/PassRegistry.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/IR/Function.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
#include "llvm/Support/RWMutex.h"
#include <vector>
@@ -36,62 +32,23 @@ PassRegistry *PassRegistry::getPassRegistry() {
return &*PassRegistryObj;
}
-static ManagedStatic<sys::SmartRWMutex<true> > Lock;
-
-//===----------------------------------------------------------------------===//
-// PassRegistryImpl
-//
-
-namespace {
-struct PassRegistryImpl {
- /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
- typedef DenseMap<const void*, const PassInfo*> MapType;
- MapType PassInfoMap;
-
- typedef StringMap<const PassInfo*> StringMapType;
- StringMapType PassInfoStringMap;
-
- /// AnalysisGroupInfo - Keep track of information for each analysis group.
- struct AnalysisGroupInfo {
- SmallPtrSet<const PassInfo *, 8> Implementations;
- };
- DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
-
- std::vector<std::unique_ptr<const PassInfo>> ToFree;
- std::vector<PassRegistrationListener*> Listeners;
-};
-} // end anonymous namespace
-
-void *PassRegistry::getImpl() const {
- if (!pImpl)
- pImpl = new PassRegistryImpl();
- return pImpl;
-}
-
//===----------------------------------------------------------------------===//
// Accessors
//
PassRegistry::~PassRegistry() {
- sys::SmartScopedWriter<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
- delete Impl;
- pImpl = nullptr;
}
const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
- sys::SmartScopedReader<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
- return I != Impl->PassInfoMap.end() ? I->second : nullptr;
+ sys::SmartScopedReader<true> Guard(Lock);
+ MapType::const_iterator I = PassInfoMap.find(TI);
+ return I != PassInfoMap.end() ? I->second : nullptr;
}
const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
- sys::SmartScopedReader<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- PassRegistryImpl::StringMapType::const_iterator
- I = Impl->PassInfoStringMap.find(Arg);
- return I != Impl->PassInfoStringMap.end() ? I->second : nullptr;
+ sys::SmartScopedReader<true> Guard(Lock);
+ StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
+ return I != PassInfoStringMap.end() ? I->second : nullptr;
}
//===----------------------------------------------------------------------===//
@@ -99,39 +56,34 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
//
void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
- sys::SmartScopedWriter<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+ sys::SmartScopedWriter<true> Guard(Lock);
bool Inserted =
- Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
assert(Inserted && "Pass registered multiple times!");
(void)Inserted;
- Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+ PassInfoStringMap[PI.getPassArgument()] = &PI;
// Notify any listeners.
for (std::vector<PassRegistrationListener*>::iterator
- I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+ I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
(*I)->passRegistered(&PI);
- if (ShouldFree) Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&PI));
+ if (ShouldFree) ToFree.push_back(std::unique_ptr<const PassInfo>(&PI));
}
void PassRegistry::unregisterPass(const PassInfo &PI) {
- sys::SmartScopedWriter<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- PassRegistryImpl::MapType::iterator I =
- Impl->PassInfoMap.find(PI.getTypeInfo());
- assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+ sys::SmartScopedWriter<true> Guard(Lock);
+ MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
+ assert(I != PassInfoMap.end() && "Pass registered but not in map!");
// Remove pass from the map.
- Impl->PassInfoMap.erase(I);
- Impl->PassInfoStringMap.erase(PI.getPassArgument());
+ PassInfoMap.erase(I);
+ PassInfoStringMap.erase(PI.getPassArgument());
}
void PassRegistry::enumerateWith(PassRegistrationListener *L) {
- sys::SmartScopedReader<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
- E = Impl->PassInfoMap.end(); I != E; ++I)
+ sys::SmartScopedReader<true> Guard(Lock);
+ for (auto I = PassInfoMap.begin(), E = PassInfoMap.end(); I != E; ++I)
L->passEnumerate(I->second);
}
@@ -156,15 +108,13 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
assert(ImplementationInfo &&
"Must register pass before adding to AnalysisGroup!");
- sys::SmartScopedWriter<true> Guard(*Lock);
+ sys::SmartScopedWriter<true> Guard(Lock);
// Make sure we keep track of the fact that the implementation implements
// the interface.
ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- PassRegistryImpl::AnalysisGroupInfo &AGI =
- Impl->AnalysisGroupInfoMap[InterfaceInfo];
+ AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
"Cannot add a pass to the same analysis group more than once!");
AGI.Implementations.insert(ImplementationInfo);
@@ -179,30 +129,18 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
}
}
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
if (ShouldFree)
- Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree));
+ ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree));
}
void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
- sys::SmartScopedWriter<true> Guard(*Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- Impl->Listeners.push_back(L);
+ sys::SmartScopedWriter<true> Guard(Lock);
+ Listeners.push_back(L);
}
void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
- sys::SmartScopedWriter<true> Guard(*Lock);
-
- // NOTE: This is necessary, because removeRegistrationListener() can be called
- // as part of the llvm_shutdown sequence. Since we have no control over the
- // order of that sequence, we need to gracefully handle the case where the
- // PassRegistry is destructed before the object that triggers this call.
- if (!pImpl) return;
+ sys::SmartScopedWriter<true> Guard(Lock);
- PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- std::vector<PassRegistrationListener*>::iterator I =
- std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
- assert(I != Impl->Listeners.end() &&
- "PassRegistrationListener not registered!");
- Impl->Listeners.erase(I);
+ auto I = std::find(Listeners.begin(), Listeners.end(), L);
+ Listeners.erase(I);
}
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index d734e4e..35c241a 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
@@ -38,13 +39,12 @@ using namespace llvm;
static inline Type *checkType(Type *Ty) {
assert(Ty && "Value defined with a null type: Error!");
- return const_cast<Type*>(Ty);
+ return Ty;
}
Value::Value(Type *ty, unsigned scid)
- : SubclassID(scid), HasValueHandle(0),
- SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
- UseList(nullptr), Name(nullptr) {
+ : VTy(checkType(ty)), UseList(nullptr), Name(nullptr), SubclassID(scid),
+ HasValueHandle(0), SubclassOptionalData(0), SubclassData(0) {
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
// constructed.
@@ -214,7 +214,7 @@ void Value::setName(const Twine &NewName) {
// then reallocated.
// Create the new name.
- Name = ValueName::Create(NameRef.begin(), NameRef.end());
+ Name = ValueName::Create(NameRef);
Name->setValue(this);
return;
}
@@ -301,27 +301,6 @@ void Value::takeName(Value *V) {
ST->reinsertValue(this);
}
-static GlobalObject &findReplacementForAliasUse(Value &C) {
- if (auto *GO = dyn_cast<GlobalObject>(&C))
- return *GO;
- if (auto *GA = dyn_cast<GlobalAlias>(&C))
- return *GA->getAliasee();
- auto *CE = cast<ConstantExpr>(&C);
- assert(CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::GetElementPtr ||
- CE->getOpcode() == Instruction::AddrSpaceCast);
- if (CE->getOpcode() == Instruction::GetElementPtr)
- assert(cast<GEPOperator>(CE)->hasAllZeroIndices());
- return findReplacementForAliasUse(*CE->getOperand(0));
-}
-
-static void replaceAliasUseWith(Use &U, Value *New) {
- GlobalObject &Replacement = findReplacementForAliasUse(*New);
- assert(&cast<GlobalObject>(*U) != &Replacement &&
- "replaceAliasUseWith cannot form an alias cycle");
- U.set(&Replacement);
-}
-
#ifndef NDEBUG
static bool contains(SmallPtrSet<ConstantExpr *, 4> &Cache, ConstantExpr *Expr,
Constant *C) {
@@ -373,10 +352,6 @@ void Value::replaceAllUsesWith(Value *New) {
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
// constant because they are uniqued.
if (auto *C = dyn_cast<Constant>(U.getUser())) {
- if (isa<GlobalAlias>(C)) {
- replaceAliasUseWith(U, New);
- continue;
- }
if (!isa<GlobalValue>(C)) {
C->replaceUsesOfWithOnConstant(this, New, &U);
continue;
@@ -498,18 +473,33 @@ Value *Value::stripInBoundsOffsets() {
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-static bool isDereferenceablePointer(const Value *V,
+static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
SmallPtrSet<const Value *, 32> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- // It's also not always safe to follow a bitcast, for example:
- // bitcast i8* (alloca i8) to i32*
- // would result in a 4-byte load from a 1-byte alloca. Some cases could
- // be handled using DataLayout to check sizes and alignments though.
// These are obviously ok.
if (isa<AllocaInst>(V)) return true;
+ // It's not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. However,
+ // if we're casting from a pointer from a type of larger size
+ // to a type of smaller size (or the same size), and the alignment
+ // is at least as large as for the resulting pointer type, then
+ // we can look through the bitcast.
+ if (DL)
+ if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) {
+ Type *STy = BC->getSrcTy()->getPointerElementType(),
+ *DTy = BC->getDestTy()->getPointerElementType();
+ if (STy->isSized() && DTy->isSized() &&
+ (DL->getTypeStoreSize(STy) >=
+ DL->getTypeStoreSize(DTy)) &&
+ (DL->getABITypeAlignment(STy) >=
+ DL->getABITypeAlignment(DTy)))
+ return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
+ }
+
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return !GV->hasExternalWeakLinkage();
@@ -523,7 +513,7 @@ static bool isDereferenceablePointer(const Value *V,
// Conservatively require that the base pointer be fully dereferenceable.
if (!Visited.insert(GEP->getOperand(0)))
return false;
- if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
+ if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
return false;
// Check the indices.
gep_type_iterator GTI = gep_type_begin(GEP);
@@ -559,9 +549,9 @@ static bool isDereferenceablePointer(const Value *V,
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
+bool Value::isDereferenceablePointer(const DataLayout *DL) const {
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceablePointer(this, Visited);
+ return ::isDereferenceablePointer(this, DL, Visited);
}
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index bcc38c1..314bad3 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -107,6 +107,12 @@ struct VerifierSupport {
OS << ' ' << *T;
}
+ void WriteComdat(const Comdat *C) {
+ if (!C)
+ return;
+ OS << *C;
+ }
+
// CheckFailed - A check failed, so print out the condition and the message
// that failed. This provides a nice place to put a breakpoint if you want
// to see why something is not correct.
@@ -138,6 +144,12 @@ struct VerifierSupport {
WriteType(T3);
Broken = true;
}
+
+ void CheckFailed(const Twine &Message, const Comdat *C) {
+ OS << Message.str() << "\n";
+ WriteComdat(C);
+ Broken = true;
+ }
};
class Verifier : public InstVisitor<Verifier>, VerifierSupport {
friend class InstVisitor<Verifier>;
@@ -230,6 +242,9 @@ public:
I != E; ++I)
visitNamedMDNode(*I);
+ for (const StringMapEntry<Comdat> &SMEC : M.getComdatSymbolTable())
+ visitComdat(SMEC.getValue());
+
visitModuleFlags(M);
visitModuleIdents(M);
@@ -241,8 +256,12 @@ private:
void visitGlobalValue(const GlobalValue &GV);
void visitGlobalVariable(const GlobalVariable &GV);
void visitGlobalAlias(const GlobalAlias &GA);
+ void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C);
+ void visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited,
+ const GlobalAlias &A, const Constant &C);
void visitNamedMDNode(const NamedMDNode &NMD);
void visitMDNode(MDNode &MD, Function *F);
+ void visitComdat(const Comdat &C);
void visitModuleIdents(const Module &M);
void visitModuleFlags(const Module &M);
void visitModuleFlag(const MDNode *Op,
@@ -384,6 +403,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
"'common' global must have a zero initializer!", &GV);
Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
&GV);
+ Assert1(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV);
}
} else {
Assert1(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
@@ -474,36 +494,57 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
visitGlobalValue(GV);
}
+void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
+ SmallPtrSet<const GlobalAlias*, 4> Visited;
+ Visited.insert(&GA);
+ visitAliaseeSubExpr(Visited, GA, C);
+}
+
+void Verifier::visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited,
+ const GlobalAlias &GA, const Constant &C) {
+ if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
+ Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
+
+ if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
+ Assert1(Visited.insert(GA2), "Aliases cannot form a cycle", &GA);
+
+ Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias",
+ &GA);
+ } else {
+ // Only continue verifying subexpressions of GlobalAliases.
+ // Do not recurse into global initializers.
+ return;
+ }
+ }
+
+ if (const auto *CE = dyn_cast<ConstantExpr>(&C))
+ VerifyConstantExprBitcastType(CE);
+
+ for (const Use &U : C.operands()) {
+ Value *V = &*U;
+ if (const auto *GA2 = dyn_cast<GlobalAlias>(V))
+ visitAliaseeSubExpr(Visited, GA, *GA2->getAliasee());
+ else if (const auto *C2 = dyn_cast<Constant>(V))
+ visitAliaseeSubExpr(Visited, GA, *C2);
+ }
+}
+
void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
Assert1(!GA.getName().empty(),
"Alias name cannot be empty!", &GA);
Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()),
- "Alias should have external or external weak linkage!", &GA);
- Assert1(GA.getAliasee(),
- "Aliasee cannot be NULL!", &GA);
- Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
-
+ "Alias should have private, internal, linkonce, weak, linkonce_odr, "
+ "weak_odr, or external linkage!",
+ &GA);
const Constant *Aliasee = GA.getAliasee();
- const GlobalValue *GV = dyn_cast<GlobalValue>(Aliasee);
-
- if (!GV) {
- const ConstantExpr *CE = dyn_cast<ConstantExpr>(Aliasee);
- if (CE && (CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::AddrSpaceCast ||
- CE->getOpcode() == Instruction::GetElementPtr))
- GV = dyn_cast<GlobalValue>(CE->getOperand(0));
+ Assert1(Aliasee, "Aliasee cannot be NULL!", &GA);
+ Assert1(GA.getType() == Aliasee->getType(),
+ "Alias and aliasee types should match!", &GA);
- Assert1(GV, "Aliasee should be either GlobalValue, bitcast or "
- "addrspacecast of GlobalValue",
- &GA);
+ Assert1(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee),
+ "Aliasee should be either GlobalValue or ConstantExpr", &GA);
- VerifyConstantExprBitcastType(CE);
- }
- Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
- if (const GlobalAlias *GAAliasee = dyn_cast<GlobalAlias>(GV)) {
- Assert1(!GAAliasee->mayBeOverridden(), "Alias cannot point to a weak alias",
- &GA);
- }
+ visitAliaseeSubExpr(GA, *Aliasee);
visitGlobalValue(GA);
}
@@ -556,6 +597,22 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
}
}
+void Verifier::visitComdat(const Comdat &C) {
+ // All Comdat::SelectionKind values other than Comdat::Any require a
+ // GlobalValue with the same name as the Comdat.
+ const GlobalValue *GV = M->getNamedValue(C.getName());
+ if (C.getSelectionKind() != Comdat::Any)
+ Assert1(GV,
+ "comdat selection kind requires a global value with the same name",
+ &C);
+ // The Module is invalid if the GlobalValue has local linkage. Allowing
+ // otherwise opens us up to seeing the underling global value get renamed if
+ // collisions occur.
+ if (GV)
+ Assert1(!GV->hasLocalLinkage(), "comdat global value has local linkage",
+ GV);
+}
+
void Verifier::visitModuleIdents(const Module &M) {
const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident");
if (!Idents)
@@ -716,7 +773,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
I->getKindAsEnum() == Attribute::Builtin ||
I->getKindAsEnum() == Attribute::NoBuiltin ||
I->getKindAsEnum() == Attribute::Cold ||
- I->getKindAsEnum() == Attribute::OptimizeNone) {
+ I->getKindAsEnum() == Attribute::OptimizeNone ||
+ I->getKindAsEnum() == Attribute::JumpTable) {
if (!isFunction) {
CheckFailed("Attribute '" + I->getAsString() +
"' only applies to functions!", V);
@@ -890,6 +948,14 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
Attribute::MinSize),
"Attributes 'minsize and optnone' are incompatible!", V);
}
+
+ if (Attrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::JumpTable)) {
+ const GlobalValue *GV = cast<GlobalValue>(V);
+ Assert1(GV->hasUnnamedAddr(),
+ "Attribute 'jumptable' requires 'unnamed_addr'", V);
+
+ }
}
void Verifier::VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy) {
@@ -2058,8 +2124,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!",
&LPI);
for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) {
- Value *Clause = LPI.getClause(i);
- Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI);
+ Constant *Clause = LPI.getClause(i);
if (LPI.isCatch(i)) {
Assert1(isa<PointerType>(Clause->getType()),
"Catch operand does not have pointer type!", &LPI);
@@ -2203,7 +2268,8 @@ void Verifier::visitInstruction(Instruction &I) {
}
MDNode *MD = I.getMetadata(LLVMContext::MD_range);
- Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+ Assert1(!MD || isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I),
+ "Ranges are only for loads, calls and invokes!", &I);
InstsInThisBlock.insert(&I);
}
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index f4ed437..f8d2f5a 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
using namespace llvm;
@@ -29,17 +29,16 @@ namespace llvm {
static const char *const TimeIRParsingGroupName = "LLVM IR Parsing";
static const char *const TimeIRParsingName = "Parse IR";
-
-Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
- LLVMContext &Context) {
+static Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
+ LLVMContext &Context) {
if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd())) {
std::string ErrMsg;
ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
- if (error_code EC = ModuleOrErr.getError()) {
+ if (std::error_code EC = ModuleOrErr.getError()) {
Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
- // ParseBitcodeFile does not take ownership of the Buffer in the
+ // getLazyBitcodeModule does not take ownership of the Buffer in the
// case of an error.
delete Buffer;
return nullptr;
@@ -52,14 +51,15 @@ Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
LLVMContext &Context) {
- std::unique_ptr<MemoryBuffer> File;
- if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = FileOrErr.getError()) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
- "Could not open input file: " + ec.message());
+ "Could not open input file: " + EC.message());
return nullptr;
}
- return getLazyIRModule(File.release(), Err, Context);
+ return getLazyIRModule(FileOrErr.get().release(), Err, Context);
}
Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
@@ -70,29 +70,31 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
(const unsigned char *)Buffer->getBufferEnd())) {
ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
Module *M = nullptr;
- if (error_code EC = ModuleOrErr.getError())
+ if (std::error_code EC = ModuleOrErr.getError())
Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
else
M = ModuleOrErr.get();
// parseBitcodeFile does not take ownership of the Buffer.
- delete Buffer;
return M;
}
- return ParseAssembly(Buffer, nullptr, Err, Context);
+ return ParseAssembly(MemoryBuffer::getMemBuffer(
+ Buffer->getBuffer(), Buffer->getBufferIdentifier()),
+ nullptr, Err, Context);
}
Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
LLVMContext &Context) {
- std::unique_ptr<MemoryBuffer> File;
- if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = FileOrErr.getError()) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
- "Could not open input file: " + ec.message());
+ "Could not open input file: " + EC.message());
return nullptr;
}
- return ParseIR(File.release(), Err, Context);
+ return ParseIR(FileOrErr.get().get(), Err, Context);
}
//===----------------------------------------------------------------------===//
@@ -104,7 +106,8 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
char **OutMessage) {
SMDiagnostic Diag;
- *OutM = wrap(ParseIR(unwrap(MemBuf), Diag, *unwrap(ContextRef)));
+ std::unique_ptr<MemoryBuffer> MB(unwrap(MemBuf));
+ *OutM = wrap(ParseIR(MB.get(), Diag, *unwrap(ContextRef)));
if(!*OutM) {
if (OutMessage) {
diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt
index c9b5212..29ed92c 100644
--- a/lib/LTO/LLVMBuild.txt
+++ b/lib/LTO/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = LTO
parent = Libraries
-required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Scalar Support Target TransformUtils
+required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Object Scalar Support Target TransformUtils
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 99236bd..335197a 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -44,7 +44,6 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -52,6 +51,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/ObjCARC.h"
+#include <system_error>
using namespace llvm;
const char* LTOCodeGenerator::getVersionString() {
@@ -114,7 +114,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
}
bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
- bool ret = IRLinker.linkInModule(mod->getLLVVMModule(), &errMsg);
+ bool ret = IRLinker.linkInModule(&mod->getModule(), &errMsg);
const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
for (int i = 0, e = undefs.size(); i != e; ++i)
@@ -124,23 +124,7 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
}
void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
- Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption;
- Options.NoFramePointerElim = options.NoFramePointerElim;
- Options.AllowFPOpFusion = options.AllowFPOpFusion;
- Options.UnsafeFPMath = options.UnsafeFPMath;
- Options.NoInfsFPMath = options.NoInfsFPMath;
- Options.NoNaNsFPMath = options.NoNaNsFPMath;
- Options.HonorSignDependentRoundingFPMathOption =
- options.HonorSignDependentRoundingFPMathOption;
- Options.UseSoftFloat = options.UseSoftFloat;
- Options.FloatABIType = options.FloatABIType;
- Options.NoZerosInBSS = options.NoZerosInBSS;
- Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt;
- Options.DisableTailCalls = options.DisableTailCalls;
- Options.StackAlignmentOverride = options.StackAlignmentOverride;
- Options.TrapFuncName = options.TrapFuncName;
- Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
- Options.UseInitArray = options.UseInitArray;
+ Options = options;
}
void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
@@ -208,7 +192,8 @@ bool LTOCodeGenerator::compile_to_file(const char** name,
// make unique temp .o file to put generated object file
SmallString<128> Filename;
int FD;
- error_code EC = sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
+ std::error_code EC =
+ sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
if (EC) {
errMsg = EC.message();
return false;
@@ -251,13 +236,14 @@ const void* LTOCodeGenerator::compile(size_t* length,
delete NativeObjectFile;
// read .o file into memory buffer
- std::unique_ptr<MemoryBuffer> BuffPtr;
- if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) {
- errMsg = ec.message();
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(name, -1, false);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ errMsg = EC.message();
sys::fs::remove(NativeObjectPath);
return nullptr;
}
- NativeObjectFile = BuffPtr.release();
+ NativeObjectFile = BufferOrErr.get().release();
// remove temp files
sys::fs::remove(NativeObjectPath);
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index d117514..844c0f2 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -24,7 +24,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
@@ -37,21 +36,16 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/system_error.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include <system_error>
using namespace llvm;
-LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t)
- : _module(m), _target(t),
- _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), &ObjFileInfo),
- _mangler(t->getDataLayout()) {
- ObjFileInfo.InitMCObjectFileInfo(t->getTargetTriple(),
- t->getRelocationModel(), t->getCodeModel(),
- _context);
-}
+LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
+ llvm::TargetMachine *TM)
+ : IRFile(std::move(Obj)), _target(TM) {}
/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
/// bitcode.
@@ -67,87 +61,63 @@ bool LTOModule::isBitcodeFile(const char *path) {
return type == sys::fs::file_magic::bitcode;
}
-/// isBitcodeFileForTarget - Returns 'true' if the file (or memory contents) is
-/// LLVM bitcode for the specified triple.
-bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
- const char *triplePrefix) {
- MemoryBuffer *buffer = makeBuffer(mem, length);
- if (!buffer)
- return false;
- return isTargetMatch(buffer, triplePrefix);
-}
-
-bool LTOModule::isBitcodeFileForTarget(const char *path,
- const char *triplePrefix) {
- std::unique_ptr<MemoryBuffer> buffer;
- if (MemoryBuffer::getFile(path, buffer))
- return false;
- return isTargetMatch(buffer.release(), triplePrefix);
-}
-
-/// isTargetMatch - Returns 'true' if the memory buffer is for the specified
-/// target triple.
-bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
+bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
+ StringRef triplePrefix) {
std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
- delete buffer;
- return strncmp(Triple.c_str(), triplePrefix, strlen(triplePrefix)) == 0;
+ return StringRef(Triple).startswith(triplePrefix);
}
-/// makeLTOModule - Create an LTOModule. N.B. These methods take ownership of
-/// the buffer.
-LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options,
- std::string &errMsg) {
- std::unique_ptr<MemoryBuffer> buffer;
- if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
- errMsg = ec.message();
+LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
+ std::string &errMsg) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(path);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ errMsg = EC.message();
return nullptr;
}
- return makeLTOModule(buffer.release(), options, errMsg);
+ return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
}
-LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
- size_t size, TargetOptions options,
- std::string &errMsg) {
- return makeLTOModule(fd, path, size, 0, options, errMsg);
+LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
+ TargetOptions options,
+ std::string &errMsg) {
+ return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
}
-LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
- size_t map_size,
- off_t offset,
- TargetOptions options,
- std::string &errMsg) {
- std::unique_ptr<MemoryBuffer> buffer;
- if (error_code ec =
- MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) {
- errMsg = ec.message();
+LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
+ size_t map_size, off_t offset,
+ TargetOptions options,
+ std::string &errMsg) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
+ if (std::error_code EC = BufferOrErr.getError()) {
+ errMsg = EC.message();
return nullptr;
}
- return makeLTOModule(buffer.release(), options, errMsg);
+ return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg);
}
-LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
- TargetOptions options,
- std::string &errMsg, StringRef path) {
+LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
+ TargetOptions options,
+ std::string &errMsg, StringRef path) {
std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path));
if (!buffer)
return nullptr;
- return makeLTOModule(buffer.release(), options, errMsg);
+ return makeLTOModule(std::move(buffer), options, errMsg);
}
-LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
+LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer,
TargetOptions options,
std::string &errMsg) {
- // parse bitcode buffer
- ErrorOr<Module *> ModuleOrErr =
- getLazyBitcodeModule(buffer, getGlobalContext());
- if (error_code EC = ModuleOrErr.getError()) {
+ ErrorOr<Module *> MOrErr =
+ getLazyBitcodeModule(Buffer.get(), getGlobalContext());
+ if (std::error_code EC = MOrErr.getError()) {
errMsg = EC.message();
- delete buffer;
return nullptr;
}
- std::unique_ptr<Module> m(ModuleOrErr.get());
+ std::unique_ptr<Module> M(MOrErr.get());
- std::string TripleStr = m->getTargetTriple();
+ std::string TripleStr = M->getTargetTriple();
if (TripleStr.empty())
TripleStr = sys::getDefaultTargetTriple();
llvm::Triple Triple(TripleStr);
@@ -175,18 +145,13 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
options);
- m->materializeAllPermanently();
+ M->materializeAllPermanently(true);
+ M->setDataLayout(target->getDataLayout());
- LTOModule *Ret = new LTOModule(m.release(), target);
+ std::unique_ptr<object::IRObjectFile> IRObj(
+ new object::IRObjectFile(std::move(Buffer), std::move(M)));
- // We need a MCContext set up in order to get mangled names of private
- // symbols. It is a bit odd that we need to report uses and definitions
- // of private symbols, but it does look like ld64 expects to be informed
- // of at least the ones with an 'l' prefix.
- MCContext &Context = Ret->_context;
- const TargetLoweringObjectFile &TLOF =
- target->getTargetLowering()->getObjFileLowering();
- const_cast<TargetLoweringObjectFile &>(TLOF).Initialize(Context, *target);
+ LTOModule *Ret = new LTOModule(std::move(IRObj), target);
if (Ret->parseSymbols(errMsg)) {
delete Ret;
@@ -305,10 +270,20 @@ void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
entry.setValue(info);
}
-/// addDefinedDataSymbol - Add a data symbol as defined to the list.
-void LTOModule::addDefinedDataSymbol(const GlobalValue *v) {
+void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
+ SmallString<64> Buffer;
+ {
+ raw_svector_ostream OS(Buffer);
+ Sym.printName(OS);
+ }
+
+ const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
+ addDefinedDataSymbol(Buffer.c_str(), V);
+}
+
+void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
// Add to list of defined symbols.
- addDefinedSymbol(v, false);
+ addDefinedSymbol(Name, v, false);
if (!v->hasSection() /* || !isTargetDarwin */)
return;
@@ -334,31 +309,43 @@ void LTOModule::addDefinedDataSymbol(const GlobalValue *v) {
// from the ObjC data structures generated by the front end.
// special case if this data blob is an ObjC class definition
- if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
+ std::string Section = v->getSection();
+ if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
addObjCClass(gv);
}
}
// special case if this data blob is an ObjC category definition
- else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
+ else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
addObjCCategory(gv);
}
}
// special case if this data blob is the list of referenced classes
- else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
+ else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
addObjCClassRef(gv);
}
}
}
-/// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
-void LTOModule::addDefinedFunctionSymbol(const Function *f) {
+void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
+ SmallString<64> Buffer;
+ {
+ raw_svector_ostream OS(Buffer);
+ Sym.printName(OS);
+ }
+
+ const Function *F =
+ cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
+ addDefinedFunctionSymbol(Buffer.c_str(), F);
+}
+
+void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
// add to list of defined symbols
- addDefinedSymbol(f, true);
+ addDefinedSymbol(Name, F, true);
}
static bool canBeHidden(const GlobalValue *GV) {
@@ -385,16 +372,8 @@ static bool canBeHidden(const GlobalValue *GV) {
return !GS.IsCompared;
}
-/// addDefinedSymbol - Add a defined symbol to the list.
-void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
- // ignore all llvm.* symbols
- if (def->getName().startswith("llvm."))
- return;
-
- // string is owned by _defines
- SmallString<64> Buffer;
- _target->getNameWithPrefix(Buffer, def, _mangler);
-
+void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
+ bool isFunction) {
// set alignment part log2() can have rounding errors
uint32_t align = def->getAlignment();
uint32_t attr = align ? countTrailingZeros(align) : 0;
@@ -431,14 +410,14 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
else
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
- StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
+ StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
entry.setValue(1);
// fill information structure
NameAndAttributes info;
- StringRef Name = entry.getKey();
- info.name = Name.data();
- assert(info.name[Name.size()] == '\0');
+ StringRef NameRef = entry.getKey();
+ info.name = NameRef.data();
+ assert(info.name[NameRef.size()] == '\0');
info.attributes = attr;
info.isFunction = isFunction;
info.symbol = def;
@@ -483,9 +462,9 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
}
if (info.isFunction)
- addDefinedFunctionSymbol(cast<Function>(info.symbol));
+ addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
else
- addDefinedDataSymbol(info.symbol);
+ addDefinedDataSymbol(info.name, info.symbol);
_symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
_symbols.back().attributes |= scope;
@@ -514,20 +493,14 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
entry.setValue(info);
}
-/// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a
-/// list to be resolved later.
-void
-LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
- // ignore all llvm.* symbols
- if (decl->getName().startswith("llvm."))
- return;
-
- // ignore all aliases
- if (isa<GlobalAlias>(decl))
- return;
-
+/// Add a symbol which isn't defined just yet to a list to be resolved later.
+void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
+ bool isFunc) {
SmallString<64> name;
- _target->getNameWithPrefix(name, decl, _mangler);
+ {
+ raw_svector_ostream OS(name);
+ Sym.printName(OS);
+ }
StringMap<NameAndAttributes>::value_type &entry =
_undefines.GetOrCreateValue(name);
@@ -540,6 +513,8 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
info.name = entry.getKey().data();
+ const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
+
if (decl->hasExternalWeakLinkage())
info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
else
@@ -551,259 +526,54 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
entry.setValue(info);
}
-namespace {
-
- class RecordStreamer : public MCStreamer {
- public:
- enum State { NeverSeen, Global, Defined, DefinedGlobal, Used };
-
- private:
- StringMap<State> Symbols;
-
- void markDefined(const MCSymbol &Symbol) {
- State &S = Symbols[Symbol.getName()];
- switch (S) {
- case DefinedGlobal:
- case Global:
- S = DefinedGlobal;
- break;
- case NeverSeen:
- case Defined:
- case Used:
- S = Defined;
- break;
- }
- }
- void markGlobal(const MCSymbol &Symbol) {
- State &S = Symbols[Symbol.getName()];
- switch (S) {
- case DefinedGlobal:
- case Defined:
- S = DefinedGlobal;
- break;
-
- case NeverSeen:
- case Global:
- case Used:
- S = Global;
- break;
- }
- }
- void markUsed(const MCSymbol &Symbol) {
- State &S = Symbols[Symbol.getName()];
- switch (S) {
- case DefinedGlobal:
- case Defined:
- case Global:
- break;
-
- case NeverSeen:
- case Used:
- S = Used;
- break;
- }
- }
-
- // FIXME: mostly copied for the obj streamer.
- void AddValueSymbols(const MCExpr *Value) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- // FIXME: What should we do in here?
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols(BE->getLHS());
- AddValueSymbols(BE->getRHS());
- break;
- }
-
- case MCExpr::SymbolRef:
- markUsed(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
- break;
+/// parseSymbols - Parse the symbols from the module and model-level ASM and add
+/// them to either the defined or undefined lists.
+bool LTOModule::parseSymbols(std::string &errMsg) {
+ for (auto &Sym : IRFile->symbols()) {
+ const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
+ uint32_t Flags = Sym.getFlags();
+ if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
+ continue;
+
+ bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
+
+ if (!GV) {
+ SmallString<64> Buffer;
+ {
+ raw_svector_ostream OS(Buffer);
+ Sym.printName(OS);
}
+ const char *Name = Buffer.c_str();
+
+ if (IsUndefined)
+ addAsmGlobalSymbolUndef(Name);
+ else if (Flags & object::BasicSymbolRef::SF_Global)
+ addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
+ else
+ addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
+ continue;
}
- public:
- typedef StringMap<State>::const_iterator const_iterator;
-
- const_iterator begin() {
- return Symbols.begin();
+ auto *F = dyn_cast<Function>(GV);
+ if (IsUndefined) {
+ addPotentialUndefinedSymbol(Sym, F != nullptr);
+ continue;
}
- const_iterator end() {
- return Symbols.end();
+ if (F) {
+ addDefinedFunctionSymbol(Sym);
+ continue;
}
- RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
-
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
- // Scan for values.
- for (unsigned i = Inst.getNumOperands(); i--; )
- if (Inst.getOperand(i).isExpr())
- AddValueSymbols(Inst.getOperand(i).getExpr());
- }
- void EmitLabel(MCSymbol *Symbol) override {
- Symbol->setSection(*getCurrentSection().first);
- markDefined(*Symbol);
- }
- void EmitDebugLabel(MCSymbol *Symbol) override {
- EmitLabel(Symbol);
- }
- void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {
- // FIXME: should we handle aliases?
- markDefined(*Symbol);
- AddValueSymbols(Value);
- }
- bool EmitSymbolAttribute(MCSymbol *Symbol,
- MCSymbolAttr Attribute) override {
- if (Attribute == MCSA_Global)
- markGlobal(*Symbol);
- return true;
+ if (isa<GlobalVariable>(GV)) {
+ addDefinedDataSymbol(Sym);
+ continue;
}
- void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size , unsigned ByteAlignment) override {
- markDefined(*Symbol);
- }
- void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override {
- markDefined(*Symbol);
- }
-
- void EmitBundleAlignMode(unsigned AlignPow2) override {}
- void EmitBundleLock(bool AlignToEnd) override {}
- void EmitBundleUnlock() override {}
-
- // Noop calls.
- void ChangeSection(const MCSection *Section,
- const MCExpr *Subsection) override {}
- void EmitAssemblerFlag(MCAssemblerFlag Flag) override {}
- void EmitThumbFunc(MCSymbol *Func) override {}
- void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override {}
- void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {}
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override {}
- void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override {}
- void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment) override {}
- void EmitBytes(StringRef Data) override {}
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc) override {}
- void EmitULEB128Value(const MCExpr *Value) override {}
- void EmitSLEB128Value(const MCExpr *Value) override {}
- void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
- unsigned ValueSize,
- unsigned MaxBytesToEmit) override {}
- void EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit) override {}
- bool EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value) override { return false; }
- void EmitFileDirective(StringRef Filename) override {}
- void FinishImpl() override {}
- void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override {
- RecordProcEnd(Frame);
- }
- };
-} // end anonymous namespace
-
-/// addAsmGlobalSymbols - Add global symbols from module-level ASM to the
-/// defined or undefined lists.
-bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
- const std::string &inlineAsm = _module->getModuleInlineAsm();
- if (inlineAsm.empty())
- return false;
-
- std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(_context));
- MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm);
- SourceMgr SrcMgr;
- SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, _context, *Streamer, *_target->getMCAsmInfo()));
- const Target &T = _target->getTarget();
- std::unique_ptr<MCInstrInfo> MCII(T.createMCInstrInfo());
- std::unique_ptr<MCSubtargetInfo> STI(T.createMCSubtargetInfo(
- _target->getTargetTriple(), _target->getTargetCPU(),
- _target->getTargetFeatureString()));
- std::unique_ptr<MCTargetAsmParser> TAP(
- T.createMCAsmParser(*STI, *Parser.get(), *MCII,
- _target->Options.MCOptions));
- if (!TAP) {
- errMsg = "target " + std::string(T.getName()) +
- " does not define AsmParser.";
- return true;
- }
-
- Parser->setTargetParser(*TAP);
- if (Parser->Run(false))
- return true;
- for (RecordStreamer::const_iterator i = Streamer->begin(),
- e = Streamer->end(); i != e; ++i) {
- StringRef Key = i->first();
- RecordStreamer::State Value = i->second;
- if (Value == RecordStreamer::DefinedGlobal)
- addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT);
- else if (Value == RecordStreamer::Defined)
- addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL);
- else if (Value == RecordStreamer::Global ||
- Value == RecordStreamer::Used)
- addAsmGlobalSymbolUndef(Key.data());
+ assert(isa<GlobalAlias>(GV));
+ addDefinedDataSymbol(Sym);
}
- return false;
-}
-
-/// isDeclaration - Return 'true' if the global value is a declaration.
-static bool isDeclaration(const GlobalValue &V) {
- if (V.hasAvailableExternallyLinkage())
- return true;
-
- if (V.isMaterializable())
- return false;
-
- return V.isDeclaration();
-}
-
-/// parseSymbols - Parse the symbols from the module and model-level ASM and add
-/// them to either the defined or undefined lists.
-bool LTOModule::parseSymbols(std::string &errMsg) {
- // add functions
- for (Module::iterator f = _module->begin(), e = _module->end(); f != e; ++f) {
- if (isDeclaration(*f))
- addPotentialUndefinedSymbol(f, true);
- else
- addDefinedFunctionSymbol(f);
- }
-
- // add data
- for (Module::global_iterator v = _module->global_begin(),
- e = _module->global_end(); v != e; ++v) {
- if (isDeclaration(*v))
- addPotentialUndefinedSymbol(v, false);
- else
- addDefinedDataSymbol(v);
- }
-
- // add asm globals
- if (addAsmGlobalSymbols(errMsg))
- return true;
-
- // add aliases
- for (const auto &Alias : _module->aliases())
- addDefinedDataSymbol(&Alias);
-
// make symbols for all undefines
for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
e = _undefines.end(); u != e; ++u) {
@@ -820,7 +590,7 @@ bool LTOModule::parseSymbols(std::string &errMsg) {
/// parseMetadata - Parse metadata from the module
void LTOModule::parseMetadata() {
// Linker Options
- if (Value *Val = _module->getModuleFlag("Linker Options")) {
+ if (Value *Val = getModule().getModuleFlag("Linker Options")) {
MDNode *LinkerOptions = cast<MDNode>(Val);
for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 45f2d4e..5bb2862 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -24,6 +24,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <cctype>
+#include <tuple>
using namespace llvm;
@@ -389,8 +390,6 @@ namespace {
/// actually need, but this allows us to reuse the ValueMapper code.
ValueToValueMapTy ValueMap;
- std::vector<std::pair<GlobalValue *, GlobalAlias *>> ReplaceWithAlias;
-
struct AppendingVarInfo {
GlobalVariable *NewGV; // New aggregate global in dest module.
Constant *DstInit; // Old initializer from dest module.
@@ -428,6 +427,18 @@ namespace {
return true;
}
+ bool getComdatLeader(Module *M, StringRef ComdatName,
+ const GlobalVariable *&GVar);
+ bool computeResultingSelectionKind(StringRef ComdatName,
+ Comdat::SelectionKind Src,
+ Comdat::SelectionKind Dst,
+ Comdat::SelectionKind &Result,
+ bool &LinkFromSrc);
+ std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
+ ComdatsChosen;
+ bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
+ bool &LinkFromSrc);
+
/// getLinkageResult - This analyzes the two global values and determines
/// what the result will look like in the destination module.
bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
@@ -536,6 +547,115 @@ Value *ValueMaterializerTy::materializeValueFor(Value *V) {
return DF;
}
+bool ModuleLinker::getComdatLeader(Module *M, StringRef ComdatName,
+ const GlobalVariable *&GVar) {
+ const GlobalValue *GVal = M->getNamedValue(ComdatName);
+ if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
+ GVal = GA->getBaseObject();
+ if (!GVal)
+ // We cannot resolve the size of the aliasee yet.
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': COMDAT key involves incomputable alias size.");
+ }
+
+ GVar = dyn_cast_or_null<GlobalVariable>(GVal);
+ if (!GVar)
+ return emitError(
+ "Linking COMDATs named '" + ComdatName +
+ "': GlobalVariable required for data dependent selection!");
+
+ return false;
+}
+
+bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
+ Comdat::SelectionKind Src,
+ Comdat::SelectionKind Dst,
+ Comdat::SelectionKind &Result,
+ bool &LinkFromSrc) {
+ // The ability to mix Comdat::SelectionKind::Any with
+ // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
+ bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
+ Dst == Comdat::SelectionKind::Largest;
+ bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any ||
+ Src == Comdat::SelectionKind::Largest;
+ if (DstAnyOrLargest && SrcAnyOrLargest) {
+ if (Dst == Comdat::SelectionKind::Largest ||
+ Src == Comdat::SelectionKind::Largest)
+ Result = Comdat::SelectionKind::Largest;
+ else
+ Result = Comdat::SelectionKind::Any;
+ } else if (Src == Dst) {
+ Result = Dst;
+ } else {
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': invalid selection kinds!");
+ }
+
+ switch (Result) {
+ case Comdat::SelectionKind::Any:
+ // Go with Dst.
+ LinkFromSrc = false;
+ break;
+ case Comdat::SelectionKind::NoDuplicates:
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': noduplicates has been violated!");
+ case Comdat::SelectionKind::ExactMatch:
+ case Comdat::SelectionKind::Largest:
+ case Comdat::SelectionKind::SameSize: {
+ const GlobalVariable *DstGV;
+ const GlobalVariable *SrcGV;
+ if (getComdatLeader(DstM, ComdatName, DstGV) ||
+ getComdatLeader(SrcM, ComdatName, SrcGV))
+ return true;
+
+ const DataLayout *DstDL = DstM->getDataLayout();
+ const DataLayout *SrcDL = SrcM->getDataLayout();
+ if (!DstDL || !SrcDL) {
+ return emitError(
+ "Linking COMDATs named '" + ComdatName +
+ "': can't do size dependent selection without DataLayout!");
+ }
+ uint64_t DstSize =
+ DstDL->getTypeAllocSize(DstGV->getType()->getPointerElementType());
+ uint64_t SrcSize =
+ SrcDL->getTypeAllocSize(SrcGV->getType()->getPointerElementType());
+ if (Result == Comdat::SelectionKind::ExactMatch) {
+ if (SrcGV->getInitializer() != DstGV->getInitializer())
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': ExactMatch violated!");
+ LinkFromSrc = false;
+ } else if (Result == Comdat::SelectionKind::Largest) {
+ LinkFromSrc = SrcSize > DstSize;
+ } else if (Result == Comdat::SelectionKind::SameSize) {
+ if (SrcSize != DstSize)
+ return emitError("Linking COMDATs named '" + ComdatName +
+ "': SameSize violated!");
+ LinkFromSrc = false;
+ } else {
+ llvm_unreachable("unknown selection kind");
+ }
+ break;
+ }
+ }
+
+ return false;
+}
+
+bool ModuleLinker::getComdatResult(const Comdat *SrcC,
+ Comdat::SelectionKind &Result,
+ bool &LinkFromSrc) {
+ StringRef ComdatName = SrcC->getName();
+ Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable();
+ Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
+ if (DstCI != ComdatSymTab.end()) {
+ const Comdat *DstC = &DstCI->second;
+ Comdat::SelectionKind SSK = SrcC->getSelectionKind();
+ Comdat::SelectionKind DSK = DstC->getSelectionKind();
+ if (computeResultingSelectionKind(ComdatName, SSK, DSK, Result, LinkFromSrc))
+ return true;
+ }
+ return false;
+}
/// getLinkageResult - This analyzes the two global values and determines what
/// the result will look like in the destination module. In particular, it
@@ -723,7 +843,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
return emitError(
"Appending variables with different unnamed_addr need to be linked!");
- if (DstGV->getSection() != SrcGV->getSection())
+ if (StringRef(DstGV->getSection()) != SrcGV->getSection())
return emitError(
"Appending variables with different section name need to be linked!");
@@ -766,34 +886,47 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
bool HasUnnamedAddr = SGV->hasUnnamedAddr();
+ bool LinkFromSrc = false;
+ Comdat *DC = nullptr;
+ if (const Comdat *SC = SGV->getComdat()) {
+ Comdat::SelectionKind SK;
+ std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
+ DC = DstM->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SK);
+ }
+
if (DGV) {
- // Concatenation of appending linkage variables is magic and handled later.
- if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
- return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
-
- // Determine whether linkage of these two globals follows the source
- // module's definition or the destination module's definition.
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- GlobalValue::VisibilityTypes NV;
- bool LinkFromSrc = false;
- if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
+ if (!DC) {
+ // Concatenation of appending linkage variables is magic and handled later.
+ if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+ return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+
+ // Determine whether linkage of these two globals follows the source
+ // module's definition or the destination module's definition.
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
+ if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
+
+ // If we're not linking from the source, then keep the definition that we
+ // have.
+ if (!LinkFromSrc) {
+ // Special case for const propagation.
+ if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+ if (DGVar->isDeclaration() && SGV->isConstant() &&
+ !DGVar->isConstant())
+ DGVar->setConstant(true);
+
+ // Set calculated linkage, visibility and unnamed_addr.
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
+ }
+ }
- // If we're not linking from the source, then keep the definition that we
- // have.
if (!LinkFromSrc) {
- // Special case for const propagation.
- if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
- if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
- DGVar->setConstant(true);
-
- // Set calculated linkage, visibility and unnamed_addr.
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
- DGV->setUnnamedAddr(HasUnnamedAddr);
-
// Make sure to remember this mapping.
ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
@@ -805,6 +938,12 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
}
}
+ // If the Comdat this variable was inside of wasn't selected, skip it.
+ if (DC && !DGV && !LinkFromSrc) {
+ DoNotLinkFromSource.insert(SGV);
+ return false;
+ }
+
// No linking to be performed or linking from the source: simply create an
// identical version of the symbol over in the dest module... the
// initializer will be filled in later by LinkGlobalInits.
@@ -820,6 +959,9 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
NewDGV->setVisibility(*NewVisibility);
NewDGV->setUnnamedAddr(HasUnnamedAddr);
+ if (DC)
+ NewDGV->setComdat(DC);
+
if (DGV) {
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
DGV->eraseFromParent();
@@ -837,21 +979,33 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
bool HasUnnamedAddr = SF->hasUnnamedAddr();
+ bool LinkFromSrc = false;
+ Comdat *DC = nullptr;
+ if (const Comdat *SC = SF->getComdat()) {
+ Comdat::SelectionKind SK;
+ std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
+ DC = DstM->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SK);
+ }
+
if (DGV) {
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- bool LinkFromSrc = false;
- GlobalValue::VisibilityTypes NV;
- if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
- HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
+ if (!DC) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
+ if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
+ }
+ }
if (!LinkFromSrc) {
- // Set calculated linkage
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
- DGV->setUnnamedAddr(HasUnnamedAddr);
-
// Make sure to remember this mapping.
ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
@@ -871,6 +1025,12 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
return false;
}
+ // If the Comdat this function was inside of wasn't selected, skip it.
+ if (DC && !DGV && !LinkFromSrc) {
+ DoNotLinkFromSource.insert(SF);
+ return false;
+ }
+
// If there is no linkage to be performed or we are linking from the source,
// bring SF over.
Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
@@ -880,6 +1040,9 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
NewDF->setVisibility(*NewVisibility);
NewDF->setUnnamedAddr(HasUnnamedAddr);
+ if (DC)
+ NewDF->setComdat(DC);
+
if (DGV) {
// Any uses of DF need to change to NewDF, with cast.
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
@@ -895,20 +1058,35 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
GlobalValue *DGV = getLinkedToGlobal(SGA);
llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+ bool HasUnnamedAddr = SGA->hasUnnamedAddr();
+
+ bool LinkFromSrc = false;
+ Comdat *DC = nullptr;
+ if (const Comdat *SC = SGA->getComdat()) {
+ Comdat::SelectionKind SK;
+ std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
+ DC = DstM->getOrInsertComdat(SC->getName());
+ DC->setSelectionKind(SK);
+ }
if (DGV) {
- GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
- GlobalValue::VisibilityTypes NV;
- bool LinkFromSrc = false;
- if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
- return true;
- NewVisibility = NV;
+ if (!DC) {
+ GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+ GlobalValue::VisibilityTypes NV;
+ if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
+ return true;
+ NewVisibility = NV;
+ HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
+
+ if (!LinkFromSrc) {
+ // Set calculated linkage.
+ DGV->setLinkage(NewLinkage);
+ DGV->setVisibility(*NewVisibility);
+ DGV->setUnnamedAddr(HasUnnamedAddr);
+ }
+ }
if (!LinkFromSrc) {
- // Set calculated linkage.
- DGV->setLinkage(NewLinkage);
- DGV->setVisibility(*NewVisibility);
-
// Make sure to remember this mapping.
ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
@@ -919,6 +1097,12 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
}
}
+ // If the Comdat this alias was inside of wasn't selected, skip it.
+ if (DC && !DGV && !LinkFromSrc) {
+ DoNotLinkFromSource.insert(SGA);
+ return false;
+ }
+
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
@@ -928,9 +1112,13 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
copyGVAttributes(NewDA, SGA);
if (NewVisibility)
NewDA->setVisibility(*NewVisibility);
+ NewDA->setUnnamedAddr(HasUnnamedAddr);
- if (DGV)
- ReplaceWithAlias.push_back(std::make_pair(DGV, NewDA));
+ if (DGV) {
+ // Any uses of DGV need to change to NewDA, with cast.
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+ DGV->eraseFromParent();
+ }
ValueMap[SGA] = NewDA;
return false;
@@ -1016,19 +1204,6 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
}
-static GlobalObject &getGlobalObjectInExpr(Constant &C) {
- auto *GO = dyn_cast<GlobalObject>(&C);
- if (GO)
- return *GO;
- auto *GA = dyn_cast<GlobalAlias>(&C);
- if (GA)
- return *GA->getAliasee();
- auto &CE = cast<ConstantExpr>(C);
- assert(CE.getOpcode() == Instruction::BitCast ||
- CE.getOpcode() == Instruction::AddrSpaceCast);
- return getGlobalObjectInExpr(*CE.getOperand(0));
-}
-
/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
void ModuleLinker::linkAliasBodies() {
for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
@@ -1039,24 +1214,8 @@ void ModuleLinker::linkAliasBodies() {
GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
Constant *Val =
MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer);
- DA->setAliasee(&getGlobalObjectInExpr(*Val));
- }
- }
-
- // Any uses of DGV need to change to NewDA, with cast.
- for (auto &Pair : ReplaceWithAlias) {
- GlobalValue *DGV = Pair.first;
- GlobalAlias *NewDA = Pair.second;
-
- for (auto *User : DGV->users()) {
- if (auto *GA = dyn_cast<GlobalAlias>(User)) {
- if (GA == NewDA)
- report_fatal_error("Linking these modules creates an alias cycle.");
- }
+ DA->setAliasee(Val);
}
-
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
- DGV->eraseFromParent();
}
}
@@ -1165,7 +1324,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
// Perform the merge for standard behavior types.
switch (SrcBehaviorValue) {
case Module::Require:
- case Module::Override: assert(0 && "not possible"); break;
+ case Module::Override: llvm_unreachable("not possible");
case Module::Error: {
// Emit an error if the values differ.
if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
@@ -1278,6 +1437,18 @@ bool ModuleLinker::run() {
// Loop over all of the linked values to compute type mappings.
computeTypeMapping();
+ ComdatsChosen.clear();
+ for (const StringMapEntry<llvm::Comdat> &SMEC : SrcM->getComdatSymbolTable()) {
+ const Comdat &C = SMEC.getValue();
+ if (ComdatsChosen.count(&C))
+ continue;
+ Comdat::SelectionKind SK;
+ bool LinkFromSrc;
+ if (getComdatResult(&C, SK, LinkFromSrc))
+ return true;
+ ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
+ }
+
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
for (Module::global_iterator I = SrcM->global_begin(),
diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk
index 23ad1d3..fd587c4 100644
--- a/lib/MC/Android.mk
+++ b/lib/MC/Android.mk
@@ -1,6 +1,7 @@
LOCAL_PATH:= $(call my-dir)
mc_SRC_FILES := \
+ ConstantPools.cpp \
ELFObjectWriter.cpp \
MCAsmBackend.cpp \
MCAsmInfo.cpp \
@@ -9,7 +10,6 @@ mc_SRC_FILES := \
MCAsmInfoELF.cpp \
MCAsmStreamer.cpp \
MCAssembler.cpp \
- MCAtom.cpp \
MCCodeEmitter.cpp \
MCCodeGenInfo.cpp \
MCContext.cpp \
@@ -18,7 +18,6 @@ mc_SRC_FILES := \
MCELF.cpp \
MCELFObjectTargetWriter.cpp \
MCELFStreamer.cpp \
- MCFunction.cpp \
MCExpr.cpp \
MCExternalSymbolizer.cpp \
MCInst.cpp \
@@ -28,13 +27,9 @@ mc_SRC_FILES := \
MCLinkerOptimizationHint.cpp \
MCMachOStreamer.cpp \
MCMachObjectTargetWriter.cpp \
- MCModule.cpp \
- MCModuleYAML.cpp \
MCNullStreamer.cpp \
MCObjectFileInfo.cpp \
- MCObjectDisassembler.cpp \
MCObjectStreamer.cpp \
- MCObjectSymbolizer.cpp \
MCObjectWriter.cpp \
MCRegisterInfo.cpp \
MCRelocationInfo.cpp \
@@ -50,9 +45,11 @@ mc_SRC_FILES := \
MCValue.cpp \
MCWin64EH.cpp \
MachObjectWriter.cpp \
+ StringTableBuilder.cpp \
SubtargetFeature.cpp \
WinCOFFObjectWriter.cpp \
WinCOFFStreamer.cpp \
+ YAML.cpp
# For the host
# =====================================================
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 6a384c1..330519e 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMC
+ ConstantPools.cpp
ELFObjectWriter.cpp
MCAsmBackend.cpp
MCAsmInfo.cpp
@@ -7,7 +8,6 @@ add_llvm_library(LLVMMC
MCAsmInfoELF.cpp
MCAsmStreamer.cpp
MCAssembler.cpp
- MCAtom.cpp
MCCodeEmitter.cpp
MCCodeGenInfo.cpp
MCContext.cpp
@@ -16,7 +16,6 @@ add_llvm_library(LLVMMC
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
- MCFunction.cpp
MCExpr.cpp
MCExternalSymbolizer.cpp
MCInst.cpp
@@ -26,13 +25,9 @@ add_llvm_library(LLVMMC
MCLinkerOptimizationHint.cpp
MCMachOStreamer.cpp
MCMachObjectTargetWriter.cpp
- MCModule.cpp
- MCModuleYAML.cpp
MCNullStreamer.cpp
MCObjectFileInfo.cpp
- MCObjectDisassembler.cpp
MCObjectStreamer.cpp
- MCObjectSymbolizer.cpp
MCObjectWriter.cpp
MCRegisterInfo.cpp
MCRelocationInfo.cpp
@@ -48,10 +43,13 @@ add_llvm_library(LLVMMC
MCValue.cpp
MCWin64EH.cpp
MachObjectWriter.cpp
+ StringTableBuilder.cpp
SubtargetFeature.cpp
WinCOFFObjectWriter.cpp
WinCOFFStreamer.cpp
+ YAML.cpp
)
+add_subdirectory(MCAnalysis)
add_subdirectory(MCParser)
add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
new file mode 100644
index 0000000..f979dad
--- /dev/null
+++ b/lib/MC/ConstantPools.cpp
@@ -0,0 +1,95 @@
+//===- ConstantPools.cpp - ConstantPool class --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ConstantPool and AssemblerConstantPools classes.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/ConstantPools.h"
+
+using namespace llvm;
+//
+// ConstantPool implementation
+//
+// Emit the contents of the constant pool using the provided streamer.
+void ConstantPool::emitEntries(MCStreamer &Streamer) {
+ if (Entries.empty())
+ return;
+ Streamer.EmitCodeAlignment(4); // align to 4-byte address
+ Streamer.EmitDataRegion(MCDR_DataRegion);
+ for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
+ I != E; ++I) {
+ Streamer.EmitLabel(I->first);
+ Streamer.EmitValue(I->second, 4);
+ }
+ Streamer.EmitDataRegion(MCDR_DataRegionEnd);
+ Entries.clear();
+}
+
+const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
+ MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
+
+ Entries.push_back(std::make_pair(CPEntryLabel, Value));
+ return MCSymbolRefExpr::Create(CPEntryLabel, Context);
+}
+
+bool ConstantPool::empty() { return Entries.empty(); }
+
+//
+// AssemblerConstantPools implementation
+//
+ConstantPool *
+AssemblerConstantPools::getConstantPool(const MCSection *Section) {
+ ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
+ if (CP == ConstantPools.end())
+ return nullptr;
+
+ return &CP->second;
+}
+
+ConstantPool &
+AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
+ return ConstantPools[Section];
+}
+
+static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
+ ConstantPool &CP) {
+ if (!CP.empty()) {
+ Streamer.SwitchSection(Section);
+ CP.emitEntries(Streamer);
+ }
+}
+
+void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
+ // Dump contents of assembler constant pools.
+ for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
+ CPE = ConstantPools.end();
+ CPI != CPE; ++CPI) {
+ const MCSection *Section = CPI->first;
+ ConstantPool &CP = CPI->second;
+
+ emitConstantPool(Streamer, Section, CP);
+ }
+}
+
+void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
+ const MCSection *Section = Streamer.getCurrentSection().first;
+ if (ConstantPool *CP = getConstantPool(Section)) {
+ emitConstantPool(Streamer, Section, *CP);
+ }
+}
+
+const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
+ const MCExpr *Expr) {
+ const MCSection *Section = Streamer.getCurrentSection().first;
+ return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
+}
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 0a54627..7fb9fae 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -28,7 +28,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/StringTableBuilder.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
@@ -1179,7 +1179,7 @@ prependCompressionHeader(uint64_t Size,
if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size())
return false;
if (sys::IsLittleEndianHost)
- Size = sys::SwapByteOrder(Size);
+ sys::swapByteOrder(Size);
CompressedContents.insert(CompressedContents.begin(),
Magic.size() + sizeof(Size), 0);
std::copy(Magic.begin(), Magic.end(), CompressedContents.begin());
@@ -1565,6 +1565,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
case ELF::SHT_X86_64_UNWIND:
case ELF::SHT_MIPS_REGINFO:
case ELF::SHT_MIPS_OPTIONS:
+ case ELF::SHT_MIPS_ABIFLAGS:
// Nothing to do.
break;
@@ -1574,8 +1575,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
break;
default:
- assert(0 && "FIXME: sh_type value not supported!");
- break;
+ llvm_unreachable("FIXME: sh_type value not supported!");
}
if (TargetObjectWriter->getEMachine() == ELF::EM_ARM &&
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
index f35dbe4..3fcb50b 100644
--- a/lib/MC/LLVMBuild.txt
+++ b/lib/MC/LLVMBuild.txt
@@ -16,10 +16,10 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = MCDisassembler MCParser
+subdirectories = MCAnalysis MCDisassembler MCParser
[component_0]
type = Library
name = MC
parent = Libraries
-required_libraries = Object Support
+required_libraries = Support
diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk
new file mode 100644
index 0000000..27f848a
--- /dev/null
+++ b/lib/MC/MCAnalysis/Android.mk
@@ -0,0 +1,37 @@
+LOCAL_PATH:= $(call my-dir)
+
+mc_analysis_SRC_FILES := \
+ MCAtom.cpp \
+ MCFunction.cpp \
+ MCModule.cpp \
+ MCModuleYAML.cpp \
+ MCObjectDisassembler.cpp \
+ MCObjectSymbolizer.cpp
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_HOST_BUILD_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+include $(CLEAR_VARS)
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+
+LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES)
+
+LOCAL_MODULE:= libLLVMMCAnalysis
+
+LOCAL_MODULE_TAGS := optional
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt
new file mode 100644
index 0000000..81eae2d
--- /dev/null
+++ b/lib/MC/MCAnalysis/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMMCAnalysis
+ MCAtom.cpp
+ MCFunction.cpp
+ MCModule.cpp
+ MCModuleYAML.cpp
+ MCObjectDisassembler.cpp
+ MCObjectSymbolizer.cpp
+)
diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt
new file mode 100644
index 0000000..1b58fec
--- /dev/null
+++ b/lib/MC/MCAnalysis/LLVMBuild.txt
@@ -0,0 +1,5 @@
+[component_0]
+type = Library
+name = MCAnalysis
+parent = Libraries
+required_libraries = MC Object Support
diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp
index bc353cd..82056ee 100644
--- a/lib/MC/MCAtom.cpp
+++ b/lib/MC/MCAnalysis/MCAtom.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCAtom.h"
-#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/Support/ErrorHandling.h"
#include <iterator>
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp
index 1ddc250..4e09d1a 100644
--- a/lib/MC/MCFunction.cpp
+++ b/lib/MC/MCAnalysis/MCFunction.cpp
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCFunction.h"
-#include "llvm/MC/MCAtom.h"
-#include "llvm/MC/MCModule.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp
index 3ed7356..7512299 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCAnalysis/MCModule.cpp
@@ -7,10 +7,10 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCModule.h"
-#include "llvm/MC/MCAtom.h"
-#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp
index f81cb14..876b06d 100644
--- a/lib/MC/MCModuleYAML.cpp
+++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCModuleYAML.h"
+#include "llvm/MC/MCAnalysis/MCModuleYAML.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/MC/MCAtom.h"
-#include "llvm/MC/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Object/YAML.h"
+#include "llvm/MC/YAML.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
@@ -102,7 +102,7 @@ struct Atom {
uint64_t Size;
std::vector<Inst> Insts;
- object::yaml::BinaryRef Data;
+ yaml::BinaryRef Data;
};
struct BasicBlock {
@@ -453,7 +453,7 @@ StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent,
InstrRegInfoHolder IRI(MII, MRI);
yaml::Input YIn(YamlContent, (void *)&IRI);
YIn >> YAMLModule;
- if (error_code ec = YIn.error())
+ if (std::error_code ec = YIn.error())
return ec.message();
StringRef err = Parser.parse(YAMLModule);
if (!err.empty())
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
index 8a258cb..0f789ff 100644
--- a/lib/MC/MCObjectDisassembler.cpp
+++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp
@@ -13,11 +13,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCAtom.h"
+#include "llvm/MC/MCAnalysis/MCFunction.h"
+#include "llvm/MC/MCAnalysis/MCModule.h"
#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCFunction.h"
#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/MCModule.h"
#include "llvm/MC/MCObjectSymbolizer.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
index b149596..b149596 100644
--- a/lib/MC/MCObjectSymbolizer.cpp
+++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp
diff --git a/lib/MC/MCAnalysis/Makefile b/lib/MC/MCAnalysis/Makefile
new file mode 100644
index 0000000..add2dbd
--- /dev/null
+++ b/lib/MC/MCAnalysis/Makefile
@@ -0,0 +1,14 @@
+##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCAnalysis
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index c0777a6..f8081ef 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -39,7 +39,7 @@ MCAsmInfo::MCAsmInfo() {
SeparatorString = ";";
CommentString = "#";
LabelSuffix = ":";
- DebugLabelSuffix = ":";
+ UseAssignmentForEHBegin = false;
PrivateGlobalPrefix = "L";
LinkerPrivateGlobalPrefix = "";
InlineAsmStart = "APP";
@@ -82,6 +82,7 @@ MCAsmInfo::MCAsmInfo() {
HasLEB128 = false;
SupportsDebugInformation = false;
ExceptionsType = ExceptionHandling::None;
+ WinEHEncodingType = WinEH::EncodingType::ET_Invalid;
DwarfUsesRelocationsAcrossSections = true;
DwarfFDESymbolsUseAbsDiff = false;
DwarfRegNumForCFI = false;
@@ -99,7 +100,7 @@ MCAsmInfo::MCAsmInfo() {
// - MCAsmInfoDarwin is handling this case
// - Generic_GCC toolchains enable the integrated assembler on a per
// architecture basis.
- // - The target subclasses for AArch64, ARM, and X86 handle these cases
+ // - The target subclasses for AArch64, ARM, and X86 handle these cases
UseIntegratedAssembler = false;
CompressDebugSections = false;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 7f8ae54..6973bbb 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -120,7 +120,6 @@ public:
void EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override;
void EmitLabel(MCSymbol *Symbol) override;
- void EmitDebugLabel(MCSymbol *Symbol) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
@@ -213,20 +212,20 @@ public:
void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
void EmitCFIWindowSave() override;
- void EmitWin64EHStartProc(const MCSymbol *Symbol) override;
- void EmitWin64EHEndProc() override;
- void EmitWin64EHStartChained() override;
- void EmitWin64EHEndChained() override;
- void EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
- bool Except) override;
- void EmitWin64EHHandlerData() override;
- void EmitWin64EHPushReg(unsigned Register) override;
- void EmitWin64EHSetFrame(unsigned Register, unsigned Offset) override;
- void EmitWin64EHAllocStack(unsigned Size) override;
- void EmitWin64EHSaveReg(unsigned Register, unsigned Offset) override;
- void EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) override;
- void EmitWin64EHPushFrame(bool Code) override;
- void EmitWin64EHEndProlog() override;
+ void EmitWinCFIStartProc(const MCSymbol *Symbol) override;
+ void EmitWinCFIEndProc() override;
+ void EmitWinCFIStartChained() override;
+ void EmitWinCFIEndChained() override;
+ void EmitWinCFIPushReg(unsigned Register) override;
+ void EmitWinCFISetFrame(unsigned Register, unsigned Offset) override;
+ void EmitWinCFIAllocStack(unsigned Size) override;
+ void EmitWinCFISaveReg(unsigned Register, unsigned Offset) override;
+ void EmitWinCFISaveXMM(unsigned Register, unsigned Offset) override;
+ void EmitWinCFIPushFrame(bool Code) override;
+ void EmitWinCFIEndProlog() override;
+
+ void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override;
+ void EmitWinEHHandlerData() override;
void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
@@ -334,14 +333,6 @@ void MCAsmStreamer::EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) {
EmitEOL();
}
-void MCAsmStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCStreamer::EmitDebugLabel(Symbol);
-
- OS << *Symbol << MAI->getDebugLabelSuffix();
- EmitEOL();
-}
-
void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break;
@@ -944,10 +935,7 @@ void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
}
void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
- // Put a dummy non-null value in Frame.End to mark that this frame has been
- // closed.
- Frame.End = (MCSymbol *) 1;
-
+ MCStreamer::EmitCFIEndProcImpl(Frame);
OS << "\t.cfi_endproc";
EmitEOL();
}
@@ -1061,37 +1049,37 @@ void MCAsmStreamer::EmitCFIWindowSave() {
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
- MCStreamer::EmitWin64EHStartProc(Symbol);
+void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
+ MCStreamer::EmitWinCFIStartProc(Symbol);
OS << ".seh_proc " << *Symbol;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHEndProc() {
- MCStreamer::EmitWin64EHEndProc();
+void MCAsmStreamer::EmitWinCFIEndProc() {
+ MCStreamer::EmitWinCFIEndProc();
OS << "\t.seh_endproc";
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHStartChained() {
- MCStreamer::EmitWin64EHStartChained();
+void MCAsmStreamer::EmitWinCFIStartChained() {
+ MCStreamer::EmitWinCFIStartChained();
OS << "\t.seh_startchained";
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHEndChained() {
- MCStreamer::EmitWin64EHEndChained();
+void MCAsmStreamer::EmitWinCFIEndChained() {
+ MCStreamer::EmitWinCFIEndChained();
OS << "\t.seh_endchained";
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
- bool Except) {
- MCStreamer::EmitWin64EHHandler(Sym, Unwind, Except);
+void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
+ MCStreamer::EmitWinEHHandler(Sym, Unwind, Except);
OS << "\t.seh_handler " << *Sym;
if (Unwind)
@@ -1114,8 +1102,8 @@ static const MCSection *getWin64EHTableSection(StringRef suffix,
SectionKind::getDataRel());
}
-void MCAsmStreamer::EmitWin64EHHandlerData() {
- MCStreamer::EmitWin64EHHandlerData();
+void MCAsmStreamer::EmitWinEHHandlerData() {
+ MCStreamer::EmitWinEHHandlerData();
// Switch sections. Don't call SwitchSection directly, because that will
// cause the section switch to be visible in the emitted assembly.
@@ -1131,50 +1119,43 @@ void MCAsmStreamer::EmitWin64EHHandlerData() {
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) {
- MCStreamer::EmitWin64EHPushReg(Register);
+void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register) {
+ MCStreamer::EmitWinCFIPushReg(Register);
- OS << "\t.seh_pushreg ";
- EmitRegisterName(Register);
+ OS << "\t.seh_pushreg " << Register;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
- MCStreamer::EmitWin64EHSetFrame(Register, Offset);
+void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWinCFISetFrame(Register, Offset);
- OS << "\t.seh_setframe ";
- EmitRegisterName(Register);
- OS << ", " << Offset;
+ OS << "\t.seh_setframe " << Register << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) {
- MCStreamer::EmitWin64EHAllocStack(Size);
+void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size) {
+ MCStreamer::EmitWinCFIAllocStack(Size);
OS << "\t.seh_stackalloc " << Size;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
- MCStreamer::EmitWin64EHSaveReg(Register, Offset);
+void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWinCFISaveReg(Register, Offset);
- OS << "\t.seh_savereg ";
- EmitRegisterName(Register);
- OS << ", " << Offset;
+ OS << "\t.seh_savereg " << Register << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
- MCStreamer::EmitWin64EHSaveXMM(Register, Offset);
+void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
+ MCStreamer::EmitWinCFISaveXMM(Register, Offset);
- OS << "\t.seh_savexmm ";
- EmitRegisterName(Register);
- OS << ", " << Offset;
+ OS << "\t.seh_savexmm " << Register << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) {
- MCStreamer::EmitWin64EHPushFrame(Code);
+void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) {
+ MCStreamer::EmitWinCFIPushFrame(Code);
OS << "\t.seh_pushframe";
if (Code)
@@ -1182,8 +1163,8 @@ void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) {
EmitEOL();
}
-void MCAsmStreamer::EmitWin64EHEndProlog(void) {
- MCStreamer::EmitWin64EHEndProlog();
+void MCAsmStreamer::EmitWinCFIEndProlog(void) {
+ MCStreamer::EmitWinCFIEndProlog();
OS << "\t.seh_endprologue";
EmitEOL();
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 886a5f5..a8aad71 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -27,6 +27,7 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSectionELF.h"
#include <tuple>
using namespace llvm;
@@ -433,12 +434,27 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
return SD->getFragment()->getAtom();
}
+// Try to fully compute Expr to an absolute value and if that fails produce
+// a relocatable expr.
+// FIXME: Should this be the behavior of EvaluateAsRelocatable itself?
+static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout,
+ MCValue &Target) {
+ if (Expr.EvaluateAsValue(Target, &Layout))
+ if (Target.isAbsolute())
+ return true;
+ return Expr.EvaluateAsRelocatable(Target, &Layout);
+}
+
bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
++stats::evaluateFixup;
- if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout))
+ // FIXME: This code has some duplication with RecordRelocation. We should
+ // probably merge the two into a single callback that tries to evaluate a
+ // fixup and records a relocation if one is needed.
+ const MCExpr *Expr = Fixup.getValue();
+ if (!evaluate(*Expr, Layout, Target))
getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
bool IsPCRel = Backend.getFixupKindInfo(
@@ -782,8 +798,13 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
assert(DF.fixup_begin() == DF.fixup_end() &&
"Cannot have fixups in virtual section!");
for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
- assert(DF.getContents()[i] == 0 &&
- "Invalid data value for virtual section!");
+ if (DF.getContents()[i]) {
+ if (auto *ELFSec = dyn_cast<const MCSectionELF>(&SD->getSection()))
+ report_fatal_error("non-zero initializer found in section '" +
+ ELFSec->getSectionName() + "'");
+ else
+ report_fatal_error("non-zero initializer found in virtual section");
+ }
break;
}
case MCFragment::FT_Align:
@@ -1222,7 +1243,7 @@ void MCSectionData::dump() {
OS << "]>";
}
-void MCSymbolData::dump() {
+void MCSymbolData::dump() const {
raw_ostream &OS = llvm::errs();
OS << "<MCSymbolData Symbol:" << getSymbol()
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index c163268..960a071 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -39,7 +39,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
AllowTemporaryLabels(true), DwarfCompileUnitID(0),
AutoReset(DoAutoReset) {
- error_code EC = llvm::sys::fs::current_path(CompilationDir);
+ std::error_code EC = llvm::sys::fs::current_path(CompilationDir);
if (EC)
CompilationDir.clear();
@@ -47,8 +47,9 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
SecureLog = nullptr;
SecureLogUsed = false;
- if (SrcMgr && SrcMgr->getNumBuffers() > 0)
- MainFileName = SrcMgr->getMemoryBuffer(0)->getBufferIdentifier();
+ if (SrcMgr && SrcMgr->getNumBuffers())
+ MainFileName =
+ SrcMgr->getMemoryBuffer(SrcMgr->getMainFileID())->getBufferIdentifier();
}
MCContext::~MCContext() {
@@ -277,14 +278,15 @@ const MCSectionELF *MCContext::CreateELFGroupSection() {
return Result;
}
-const MCSectionCOFF *
-MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
- SectionKind Kind, StringRef COMDATSymName,
- int Selection, const MCSectionCOFF *Assoc) {
+const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
+ unsigned Characteristics,
+ SectionKind Kind,
+ StringRef COMDATSymName,
+ int Selection) {
// Do the lookup, if we have a hit, return it.
- SectionGroupPair P(Section, COMDATSymName);
- auto IterBool = COFFUniquingMap.insert(std::make_pair(P, nullptr));
+ SectionGroupTriple T(Section, COMDATSymName, Selection);
+ auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr));
auto Iter = IterBool.first;
if (!IterBool.second)
return Iter->second;
@@ -293,9 +295,9 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
if (!COMDATSymName.empty())
COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
- StringRef CachedName = Iter->first.first;
- MCSectionCOFF *Result = new (*this) MCSectionCOFF(
- CachedName, Characteristics, COMDATSymbol, Selection, Assoc, Kind);
+ StringRef CachedName = std::get<0>(Iter->first);
+ MCSectionCOFF *Result = new (*this)
+ MCSectionCOFF(CachedName, Characteristics, COMDATSymbol, Selection, Kind);
Iter->second = Result;
return Result;
@@ -308,8 +310,8 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
}
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
- SectionGroupPair P(Section, "");
- auto Iter = COFFUniquingMap.find(P);
+ SectionGroupTriple T(Section, "", 0);
+ auto Iter = COFFUniquingMap.find(T);
if (Iter == COFFUniquingMap.end())
return nullptr;
return Iter->second;
@@ -339,6 +341,29 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
return !MCDwarfFiles[FileNumber].Name.empty();
}
+/// finalizeDwarfSections - Emit end symbols for each non-empty code section.
+/// Also remove empty sections from SectionStartEndSyms, to avoid generating
+/// useless debug info for them.
+void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
+ MCContext &context = MCOS.getContext();
+
+ auto sec = SectionStartEndSyms.begin();
+ while (sec != SectionStartEndSyms.end()) {
+ assert(sec->second.first && "Start symbol must be set by now");
+ MCOS.SwitchSection(sec->first);
+ if (MCOS.mayHaveInstructions()) {
+ MCSymbol *SectionEndSym = context.CreateTempSymbol();
+ MCOS.EmitLabel(SectionEndSym);
+ sec->second.second = SectionEndSym;
+ ++sec;
+ } else {
+ MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >::iterator
+ to_erase = sec;
+ sec = SectionStartEndSyms.erase(to_erase);
+ }
+ }
+}
+
void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const {
// If we have a source manager and a location, use it. Otherwise just
// use the generic report_fatal_error().
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index be6731a..0a3fab8 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -518,8 +519,12 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
- EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
- EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) {
+ EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
+ } else {
+ EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+ EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+ }
EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
if (!context.getCompilationDir().empty())
EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
@@ -552,20 +557,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
}
// When generating dwarf for assembly source files this emits the data for
-// .debug_aranges section. Which contains a header and a table of pairs of
-// PointerSize'ed values for the address and size of section(s) with line table
-// entries (just the default .text in our case) and a terminating pair of zeros.
+// .debug_aranges section. This section contains a header and a table of pairs
+// of PointerSize'ed values for the address and size of section(s) with line
+// table entries.
static void EmitGenDwarfAranges(MCStreamer *MCOS,
const MCSymbol *InfoSectionSymbol) {
MCContext &context = MCOS->getContext();
- // Create a symbol at the end of the section that we are creating the dwarf
- // debugging info to use later in here as part of the expression to calculate
- // the size of the section for the table.
- MCOS->SwitchSection(context.getGenDwarfSection());
- MCSymbol *SectionEndSym = context.CreateTempSymbol();
- MCOS->EmitLabel(SectionEndSym);
- context.setGenDwarfSectionEndSym(SectionEndSym);
+ auto &Sections = context.getGenDwarfSectionSyms();
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
@@ -583,8 +582,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
Length += Pad;
// Add the size of the pair of PointerSize'ed values for the address and size
- // of the one default .text section we have in the table.
- Length += 2 * AddrSize;
+ // of each section we have in the table.
+ Length += 2 * AddrSize * Sections.size();
// And the pair of terminating zeros.
Length += 2 * AddrSize;
@@ -608,14 +607,21 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
for(int i = 0; i < Pad; i++)
MCOS->EmitIntValue(0, 1);
- // Now emit the table of pairs of PointerSize'ed values for the section(s)
- // address and size, in our case just the one default .text section.
- const MCExpr *Addr = MCSymbolRefExpr::Create(
- context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
- const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
- *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
- MCOS->EmitValue(Addr, AddrSize);
- MCOS->EmitAbsValue(Size, AddrSize);
+ // Now emit the table of pairs of PointerSize'ed values for the section
+ // addresses and sizes.
+ for (const auto &sec : Sections) {
+ MCSymbol *StartSymbol = sec.second.first;
+ MCSymbol *EndSymbol = sec.second.second;
+ assert(StartSymbol && "StartSymbol must not be NULL");
+ assert(EndSymbol && "EndSymbol must not be NULL");
+
+ const MCExpr *Addr = MCSymbolRefExpr::Create(
+ StartSymbol, MCSymbolRefExpr::VK_None, context);
+ const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+ *StartSymbol, *EndSymbol, 0);
+ MCOS->EmitValue(Addr, AddrSize);
+ MCOS->EmitAbsValue(Size, AddrSize);
+ }
// And finally the pair of terminating zeros.
MCOS->EmitIntValue(0, AddrSize);
@@ -627,7 +633,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS,
// DIE and a list of label DIEs.
static void EmitGenDwarfInfo(MCStreamer *MCOS,
const MCSymbol *AbbrevSectionSymbol,
- const MCSymbol *LineSectionSymbol) {
+ const MCSymbol *LineSectionSymbol,
+ const MCSymbol *RangesSectionSymbol) {
MCContext &context = MCOS->getContext();
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
@@ -674,15 +681,37 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
MCOS->EmitIntValue(0, 4);
}
- // AT_low_pc, the first address of the default .text section.
- const MCExpr *Start = MCSymbolRefExpr::Create(
- context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
- MCOS->EmitValue(Start, AddrSize);
+ if (RangesSectionSymbol) {
+ // There are multiple sections containing code, so we must use the
+ // .debug_ranges sections.
- // AT_high_pc, the last address of the default .text section.
- const MCExpr *End = MCSymbolRefExpr::Create(
- context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
- MCOS->EmitValue(End, AddrSize);
+ // AT_ranges, the 4 byte offset from the start of the .debug_ranges section
+ // to the address range list for this compilation unit.
+ MCOS->EmitSymbolValue(RangesSectionSymbol, 4);
+ } else {
+ // If we only have one non-empty code section, we can use the simpler
+ // AT_low_pc and AT_high_pc attributes.
+
+ // Find the first (and only) non-empty text section
+ auto &Sections = context.getGenDwarfSectionSyms();
+ const auto TextSection = Sections.begin();
+ assert(TextSection != Sections.end() && "No text section found");
+
+ MCSymbol *StartSymbol = TextSection->second.first;
+ MCSymbol *EndSymbol = TextSection->second.second;
+ assert(StartSymbol && "StartSymbol must not be NULL");
+ assert(EndSymbol && "EndSymbol must not be NULL");
+
+ // AT_low_pc, the first address of the default .text section.
+ const MCExpr *Start = MCSymbolRefExpr::Create(
+ StartSymbol, MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitValue(Start, AddrSize);
+
+ // AT_high_pc, the last address of the default .text section.
+ const MCExpr *End = MCSymbolRefExpr::Create(
+ EndSymbol, MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitValue(End, AddrSize);
+ }
// AT_name, the name of the source file. Reconstruct from the first directory
// and file table entries.
@@ -766,13 +795,51 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
MCOS->EmitLabel(InfoEnd);
}
+// When generating dwarf for assembly source files this emits the data for
+// .debug_ranges section. We only emit one range list, which spans all of the
+// executable sections of this file.
+static void EmitGenDwarfRanges(MCStreamer *MCOS) {
+ MCContext &context = MCOS->getContext();
+ auto &Sections = context.getGenDwarfSectionSyms();
+
+ const MCAsmInfo *AsmInfo = context.getAsmInfo();
+ int AddrSize = AsmInfo->getPointerSize();
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+
+ for (const auto sec : Sections) {
+
+ MCSymbol *StartSymbol = sec.second.first;
+ MCSymbol *EndSymbol = sec.second.second;
+ assert(StartSymbol && "StartSymbol must not be NULL");
+ assert(EndSymbol && "EndSymbol must not be NULL");
+
+ // Emit a base address selection entry for the start of this section
+ const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create(
+ StartSymbol, MCSymbolRefExpr::VK_None, context);
+ MCOS->EmitFill(AddrSize, 0xFF);
+ MCOS->EmitValue(SectionStartAddr, AddrSize);
+
+ // Emit a range list entry spanning this section
+ const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS,
+ *StartSymbol, *EndSymbol, 0);
+ MCOS->EmitIntValue(0, AddrSize);
+ MCOS->EmitAbsValue(SectionSize, AddrSize);
+ }
+
+ // Emit end of list entry
+ MCOS->EmitIntValue(0, AddrSize);
+ MCOS->EmitIntValue(0, AddrSize);
+}
+
//
// When generating dwarf for assembly source files this emits the Dwarf
// sections.
//
void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
- // Create the dwarf sections in this order (.debug_line already created).
MCContext &context = MCOS->getContext();
+
+ // Create the dwarf sections in this order (.debug_line already created).
const MCAsmInfo *AsmInfo = context.getAsmInfo();
bool CreateDwarfSectionSymbols =
AsmInfo->doesDwarfUseRelocationsAcrossSections();
@@ -781,6 +848,22 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
MCSymbol *AbbrevSectionSymbol = nullptr;
MCSymbol *InfoSectionSymbol = nullptr;
+ MCSymbol *RangesSectionSymbol = NULL;
+
+ // Create end symbols for each section, and remove empty sections
+ MCOS->getContext().finalizeDwarfSections(*MCOS);
+
+ // If there are no sections to generate debug info for, we don't need
+ // to do anything
+ if (MCOS->getContext().getGenDwarfSectionSyms().empty())
+ return;
+
+ // We only need to use the .debug_ranges section if we have multiple
+ // code sections.
+ const bool UseRangesSection =
+ MCOS->getContext().getGenDwarfSectionSyms().size() > 1;
+ CreateDwarfSectionSymbols |= UseRangesSection;
+
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
if (CreateDwarfSectionSymbols) {
InfoSectionSymbol = context.CreateTempSymbol();
@@ -791,20 +874,30 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
AbbrevSectionSymbol = context.CreateTempSymbol();
MCOS->EmitLabel(AbbrevSectionSymbol);
}
- MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+ if (UseRangesSection) {
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection());
+ if (CreateDwarfSectionSymbols) {
+ RangesSectionSymbol = context.CreateTempSymbol();
+ MCOS->EmitLabel(RangesSectionSymbol);
+ }
+ }
- // If there are no line table entries then do not emit any section contents.
- if (!context.hasMCLineSections())
- return;
+ assert((RangesSectionSymbol != NULL) || !UseRangesSection);
+
+ MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
// Output the data for .debug_aranges section.
EmitGenDwarfAranges(MCOS, InfoSectionSymbol);
+ if (UseRangesSection)
+ EmitGenDwarfRanges(MCOS);
+
// Output the data for .debug_abbrev section.
EmitGenDwarfAbbrev(MCOS);
// Output the data for .debug_info section.
- EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+ EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol,
+ RangesSectionSymbol);
}
//
@@ -815,12 +908,13 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
//
void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
SourceMgr &SrcMgr, SMLoc &Loc) {
- // We won't create dwarf labels for temporary symbols or symbols not in
- // the default text.
+ // We won't create dwarf labels for temporary symbols.
if (Symbol->isTemporary())
return;
MCContext &context = MCOS->getContext();
- if (context.getGenDwarfSection() != MCOS->getCurrentSection().first)
+ // We won't create dwarf labels for symbols in sections that we are not
+ // generating debug info for.
+ if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first))
return;
// The dwarf label's name does not have the symbol name's leading
@@ -834,7 +928,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
// Finding the line number is the expensive part which is why we just don't
// pass it in as for some symbols we won't create a dwarf label.
- int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ unsigned CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
// We create a temporary symbol for use for the AT_high_pc and AT_low_pc
@@ -1203,7 +1297,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
unsigned FDEEncoding = MOFI->getFDEEncoding();
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
if (VerboseAsm) Streamer.AddComment("Range Start");
- Streamer.EmitSymbolValue(Frame.Function, Size);
+ Streamer.EmitSymbolValue(Frame.Begin, Size);
// Range Length
const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
@@ -1246,12 +1340,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
- MCSymbol *sectionStart;
- if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH)
- sectionStart = context.CreateTempSymbol();
- else
- sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
-
+ MCSymbol *sectionStart = context.CreateTempSymbol();
streamer.EmitLabel(sectionStart);
CIENum++;
@@ -1270,7 +1359,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
// Version
if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
- streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+ // For DWARF2, we use CIE version 1
+ // For DWARF3+, we use CIE version 3
+ uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3;
+ streamer.EmitIntValue(CIEVersion, 1);
// Augmentation String
SmallString<8> Augmentation;
@@ -1298,7 +1390,14 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
// Return Address Register
if (verboseAsm) streamer.AddComment("CIE Return Address Column");
- streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true));
+ if (CIEVersion == 1) {
+ assert(MRI->getRARegister() <= 255 &&
+ "DWARF 2 encodes return_address_register in one byte");
+ streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1);
+ } else {
+ streamer.EmitULEB128IntValue(
+ MRI->getDwarfRegNum(MRI->getRARegister(), true));
+ }
// Augmentation Data Length (optional)
@@ -1360,13 +1459,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
bool verboseAsm = streamer.isVerboseAsm();
- if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) {
- MCSymbol *EHSym =
- context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
- streamer.EmitEHSymAttributes(frame.Function, EHSym);
- streamer.EmitLabel(EHSym);
- }
-
// Length
const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
if (verboseAsm) streamer.AddComment("FDE Length");
@@ -1435,13 +1527,12 @@ namespace {
return CIEKey(nullptr, -1, 0, false, false);
}
- CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
- unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) :
- Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
- LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
- IsSimple(IsSimple_) {
- }
- const MCSymbol* Personality;
+ CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_,
+ unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_)
+ : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+ LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
+ IsSimple(IsSimple_) {}
+ const MCSymbol *Personality;
unsigned PersonalityEncoding;
unsigned LsdaEncoding;
bool IsSignalFrame;
@@ -1516,7 +1607,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
Emitter.setSectionStart(SectionStart);
MCSymbol *FDEEnd = nullptr;
- DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+ DenseMap<CIEKey, const MCSymbol *> CIEStarts;
const MCSymbol *DummyDebugKey = nullptr;
NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 767348c..7c70540 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -65,10 +65,6 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
MCELF::SetType(SD, ELF::STT_TLS);
}
-void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- EmitLabel(Symbol);
-}
-
void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
// Let the target do whatever target specific stuff it needs to do.
getAssembler().getBackend().handleAssemblerFlag(Flag);
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 37d05e9..9e8bc94 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -60,7 +60,6 @@ public:
void ChangeSection(const MCSection *Sect, const MCExpr *Subsect) override;
void EmitLabel(MCSymbol *Symbol) override;
- void EmitDebugLabel(MCSymbol *Symbol) override;
void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
@@ -162,9 +161,6 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
}
-void MCMachOStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- EmitLabel(Symbol);
-}
void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
if (!getAssembler().getBackend().hasDataInCodeSupport())
return;
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 4f2740e..d543402 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -24,83 +24,17 @@ namespace {
/// @name MCStreamer Interface
/// @{
- void ChangeSection(const MCSection *Section,
- const MCExpr *Subsection) override {
- }
-
- void EmitLabel(MCSymbol *Symbol) override {
- assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- assert(getCurrentSection().first &&"Cannot emit before setting section!");
- AssignSection(Symbol, getCurrentSection().first);
- }
- void EmitDebugLabel(MCSymbol *Symbol) override {
- EmitLabel(Symbol);
- }
- void EmitAssemblerFlag(MCAssemblerFlag Flag) override {}
- void EmitThumbFunc(MCSymbol *Func) override {}
-
- void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {}
- void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {}
bool EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) override {
return true;
}
- void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override {}
-
- void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void EmitCOFFSymbolStorageClass(int StorageClass) override {}
- void EmitCOFFSymbolType(int Type) override {}
- void EndCOFFSymbolDef() override {}
void EmitCOFFSecRel32(MCSymbol const *Symbol) override {}
-
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override {}
void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override {}
- void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override {}
void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override {}
- void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment) override {}
- void EmitBytes(StringRef Data) override {}
-
- void EmitValueImpl(const MCExpr *Value, unsigned Size,
- const SMLoc &Loc = SMLoc()) override {}
- void EmitULEB128Value(const MCExpr *Value) override {}
- void EmitSLEB128Value(const MCExpr *Value) override {}
void EmitGPRel32Value(const MCExpr *Value) override {}
- void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
- unsigned ValueSize = 1,
- unsigned MaxBytesToEmit = 0) override {}
-
- void EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0) override {}
-
- bool EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0) override { return false; }
-
- void EmitFileDirective(StringRef Filename) override {}
- unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
- StringRef Filename,
- unsigned CUID = 0) override {
- return 0;
- }
- void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
- unsigned Column, unsigned Flags,
- unsigned Isa, unsigned Discriminator,
- StringRef FileName) override {}
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo&) override {}
-
- void EmitBundleAlignMode(unsigned AlignPow2) override {}
- void EmitBundleLock(bool AlignToEnd) override {}
- void EmitBundleUnlock() override {}
-
- void FinishImpl() override {}
-
- void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override {
- RecordProcEnd(Frame);
- }
};
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 9d413af..d490ef3 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -18,9 +18,29 @@
#include "llvm/MC/MCSectionMachO.h"
using namespace llvm;
+static bool useCompactUnwind(const Triple &T) {
+ // Only on darwin.
+ if (!T.isOSDarwin())
+ return false;
+
+ // aarch64 always has it.
+ if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
+ return true;
+
+ // Use it on newer version of OS X.
+ if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ return true;
+
+ // And the iOS simulator.
+ if (T.isiOS() &&
+ (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86))
+ return true;
+
+ return false;
+}
+
void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
// MachO
- IsFunctionEHFrameSymbolPrivate = false;
SupportsWeakOmittedEHFrame = false;
if (T.isOSDarwin() &&
@@ -151,13 +171,10 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
COFFDebugSymbolsSection = nullptr;
- if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
- (T.isOSDarwin() &&
- (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) {
+ if (useCompactUnwind(T)) {
CompactUnwindSection =
- Ctx->getMachOSection("__LD", "__compact_unwind",
- MachO::S_ATTR_DEBUG,
- SectionKind::getReadOnly());
+ Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG,
+ SectionKind::getReadOnly());
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000;
@@ -321,6 +338,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
break;
+ case Triple::mips:
+ case Triple::mipsel:
+ // MIPS uses indirect pointer to refer personality functions, so that the
+ // eh_frame section can be read-only. DW.ref.personality will be generated
+ // for relocation.
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect;
+ break;
case Triple::ppc64:
case Triple::ppc64le:
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
@@ -562,6 +586,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+ bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
+
// The object file format cannot represent common symbols with explicit
// alignments.
CommDirectiveSupportsAlignment = false;
@@ -575,6 +601,8 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
SectionKind::getBSS());
TextSection =
Ctx->getCOFFSection(".text",
+ (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT
+ : (COFF::SectionCharacteristics)0) |
COFF::IMAGE_SCN_CNT_CODE |
COFF::IMAGE_SCN_MEM_EXECUTE |
COFF::IMAGE_SCN_MEM_READ,
@@ -590,12 +618,18 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- if (T.isKnownWindowsMSVCEnvironment()) {
+
+ if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
Ctx->getCOFFSection(".CRT$XCU",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
+ StaticDtorSection =
+ Ctx->getCOFFSection(".CRT$XTX",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
} else {
StaticCtorSection =
Ctx->getCOFFSection(".ctors",
@@ -603,16 +637,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
- }
-
-
- if (T.isKnownWindowsMSVCEnvironment()) {
- StaticDtorSection =
- Ctx->getCOFFSection(".CRT$XTX",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
- } else {
StaticDtorSection =
Ctx->getCOFFSection(".dtors",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -625,11 +649,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
// though it contains relocatable pointers. In PIC mode, this is probably a
// big runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
- LSDASection =
- Ctx->getCOFFSection(".gcc_except_table",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getReadOnly());
+ assert(T.isOSWindows() && "Windows is the only supported COFF target");
+ if (T.getArch() == Triple::x86_64) {
+ // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
+ LSDASection = 0;
+ } else {
+ LSDASection = Ctx->getCOFFSection(".gcc_except_table",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
+ }
// Debug info.
COFFDebugSymbolsSection =
@@ -705,36 +734,46 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfInfoDWOSection =
- Ctx->getCOFFSection(".debug_info.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_info.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfAbbrevDWOSection =
- Ctx->getCOFFSection(".debug_abbrev.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_abbrev.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfStrDWOSection =
- Ctx->getCOFFSection(".debug_str.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_str.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLineDWOSection =
- Ctx->getCOFFSection(".debug_line.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_line.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLocDWOSection =
- Ctx->getCOFFSection(".debug_loc.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_loc.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfStrOffDWOSection =
- Ctx->getCOFFSection(".debug_str_offsets.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE |
- COFF::IMAGE_SCN_MEM_READ,
+ Ctx->getCOFFSection(".debug_str_offsets.dwo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
- DwarfAddrSection = Ctx->getCOFFSection(
- ".debug_addr", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ,
- SectionKind::getMetadata());
+
+ DwarfAddrSection =
+ Ctx->getCOFFSection(".debug_addr",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DrectveSection =
Ctx->getCOFFSection(".drectve",
- COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE,
+ COFF::IMAGE_SCN_LNK_INFO |
+ COFF::IMAGE_SCN_LNK_REMOVE,
SectionKind::getMetadata());
PDataSection =
@@ -748,6 +787,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getDataRel());
+
TLSDataSection =
Ctx->getCOFFSection(".tls$",
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -756,7 +796,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
SectionKind::getDataRel());
}
-void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
+void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm,
CodeModel::Model cm,
MCContext &ctx) {
RelocM = relocm;
@@ -766,7 +806,6 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
// Common.
CommDirectiveSupportsAlignment = true;
SupportsWeakOmittedEHFrame = true;
- IsFunctionEHFrameSymbolPrivate = true;
SupportsCompactUnwindWithoutEHFrame = false;
PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
@@ -781,8 +820,9 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
DwarfAccelNamespaceSection = nullptr; // Used only by selected targets.
DwarfAccelTypesSection = nullptr; // Used only by selected targets.
- Triple T(TT);
- Triple::ArchType Arch = T.getArch();
+ TT = Triple(T);
+
+ Triple::ArchType Arch = TT.getArch();
// FIXME: Checking for Arch here to filter out bogus triples such as
// cellspu-apple-darwin. Perhaps we should fix in Triple?
if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
@@ -790,17 +830,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
Arch == Triple::arm64 || Arch == Triple::aarch64 ||
Arch == Triple::ppc || Arch == Triple::ppc64 ||
Arch == Triple::UnknownArch) &&
- (T.isOSDarwin() || T.isOSBinFormatMachO())) {
+ (TT.isOSDarwin() || TT.isOSBinFormatMachO())) {
Env = IsMachO;
- InitMachOMCObjectFileInfo(T);
+ InitMachOMCObjectFileInfo(TT);
} else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
Arch == Triple::arm || Arch == Triple::thumb) &&
- (T.isOSWindows() && T.getObjectFormat() == Triple::COFF)) {
+ (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) {
Env = IsCOFF;
- InitCOFFMCObjectFileInfo(T);
+ InitCOFFMCObjectFileInfo(TT);
} else {
Env = IsELF;
- InitELFMCObjectFileInfo(T);
+ InitELFMCObjectFileInfo(TT);
}
}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index a1aa602..a721b59 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -83,32 +83,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
return F;
}
-const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols(BE->getLHS());
- AddValueSymbols(BE->getRHS());
- break;
- }
-
- case MCExpr::SymbolRef:
- Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
- break;
- }
-
- return Value;
+void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) {
+ Assembler->getOrCreateSymbolData(Sym);
}
void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
@@ -119,13 +95,14 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
const SMLoc &Loc) {
+ MCStreamer::EmitValueImpl(Value, Size, Loc);
MCDataFragment *DF = getOrCreateDataFragment();
MCLineEntry::Make(this, getCurrentSection().first);
// Avoid fixups when possible.
int64_t AbsValue;
- if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+ if (Value->EvaluateAsAbsolute(AbsValue, getAssembler())) {
EmitIntValue(AbsValue, Size);
return;
}
@@ -136,11 +113,14 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
}
void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
- RecordProcStart(Frame);
+ // We need to create a local symbol to avoid relocations.
+ Frame.Begin = getContext().CreateTempSymbol();
+ EmitLabel(Frame.Begin);
}
void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
- RecordProcEnd(Frame);
+ Frame.End = getContext().CreateTempSymbol();
+ EmitLabel(Frame.End);
}
void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
@@ -158,10 +138,6 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
SD.setOffset(F->getContents().size());
}
-void MCObjectStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- EmitLabel(Symbol);
-}
-
void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) {
int64_t IntValue;
if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
@@ -205,15 +181,12 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section,
void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
getAssembler().getOrCreateSymbolData(*Symbol);
- AddValueSymbols(Value);
MCStreamer::EmitAssignment(Symbol, Value);
}
-void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
- // Scan for values.
- for (unsigned i = Inst.getNumOperands(); i--; )
- if (Inst.getOperand(i).isExpr())
- AddValueSymbols(Inst.getOperand(i).getExpr());
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCStreamer::EmitInstruction(Inst, STI);
MCSectionData *SD = getCurrentSectionData();
SD->setHasInstructions(true);
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index bca516e..145ad4a 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -22,7 +22,6 @@
using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
- CurBuf = nullptr;
CurPtr = nullptr;
isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
@@ -31,13 +30,13 @@ AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
AsmLexer::~AsmLexer() {
}
-void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
- CurBuf = buf;
+void AsmLexer::setBuffer(StringRef Buf, const char *ptr) {
+ CurBuf = Buf;
if (ptr)
CurPtr = ptr;
else
- CurPtr = CurBuf->getBufferStart();
+ CurPtr = CurBuf.begin();
TokStart = nullptr;
}
@@ -58,7 +57,7 @@ int AsmLexer::getNextChar() {
case 0:
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf->getBufferEnd())
+ if (CurPtr - 1 != CurBuf.end())
return 0; // Just whitespace.
// Otherwise, return end of file.
@@ -201,8 +200,8 @@ AsmToken AsmLexer::LexLineComment() {
CurChar = getNextChar();
if (CurChar == EOF)
- return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
- return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+ return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
}
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
@@ -420,9 +419,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
- *CurPtr != '\n' &&
- *CurPtr != '\r' &&
- (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ *CurPtr != '\n' && *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
@@ -431,9 +429,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
StringRef AsmLexer::LexUntilEndOfLine() {
TokStart = CurPtr;
- while (*CurPtr != '\n' &&
- *CurPtr != '\r' &&
- (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+ while (*CurPtr != '\n' && *CurPtr != '\r' &&
+ (*CurPtr != 0 || CurPtr != CurBuf.end())) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 168597f..62ab4a5 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -102,7 +102,7 @@ public:
struct ParseStatementInfo {
/// \brief The parsed operands from the last parsed statement.
- SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands;
/// \brief The opcode from the last parsed instruction.
unsigned Opcode;
@@ -115,13 +115,6 @@ struct ParseStatementInfo {
ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {}
ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
: Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
-
- ~ParseStatementInfo() {
- // Free any parsed operands.
- for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
- delete ParsedOperands[i];
- ParsedOperands.clear();
- }
};
/// \brief The concrete assembly parser instance.
@@ -140,7 +133,7 @@ private:
/// This is the current buffer index we're lexing from as managed by the
/// SourceMgr object.
- int CurBuffer;
+ unsigned CurBuffer;
AsmCond TheCondState;
std::vector<AsmCond> TheCondStack;
@@ -169,13 +162,13 @@ private:
StringRef CppHashFilename;
int64_t CppHashLineNumber;
SMLoc CppHashLoc;
- int CppHashBuf;
+ unsigned CppHashBuf;
/// When generating dwarf for assembly source files we need to calculate the
/// logical line number based on the last parsed cpp hash file line comment
/// and current line. Since this is slow and messes up the SourceMgr's
/// cache we save the last info we queried with SrcMgr.FindLineNumber().
SMLoc LastQueryIDLoc;
- int LastQueryBuffer;
+ unsigned LastQueryBuffer;
unsigned LastQueryLine;
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
@@ -317,9 +310,9 @@ private:
/// current token is not set; clients should ensure Lex() is called
/// subsequently.
///
- /// \param InBuffer If not -1, should be the known buffer id that contains the
+ /// \param InBuffer If not 0, should be the known buffer id that contains the
/// location.
- void jumpToLoc(SMLoc Loc, int InBuffer=-1);
+ void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0);
/// \brief Parse up to the end of statement and a return the contents from the
/// current token until the end of the statement; the current token on exit
@@ -352,8 +345,9 @@ private:
DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE,
DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT,
DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC,
- DK_IF, DK_IFNE, DK_IFB, DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF,
- DK_IFNDEF, DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
+ DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB,
+ DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF,
+ DK_ELSEIF, DK_ELSE, DK_ENDIF,
DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
@@ -440,8 +434,8 @@ private:
bool parseDirectiveInclude(); // ".include"
bool parseDirectiveIncbin(); // ".incbin"
- // ".if" or ".ifne"
- bool parseDirectiveIf(SMLoc DirectiveLoc);
+ // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne"
+ bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind);
// ".ifb" or ".ifnb", depending on ExpectBlank.
bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank);
// ".ifc" or ".ifnc", depending on ExpectEqual.
@@ -497,15 +491,15 @@ enum { DEFAULT_ADDRSPACE = 0 };
AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
- PlatformParser(nullptr), CurBuffer(0), MacrosEnabledFlag(true),
- HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U),
- IsDarwin(false), ParsingInlineAsm(false) {
+ PlatformParser(nullptr), CurBuffer(_SM.getMainFileID()),
+ MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0),
+ AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
// Set our own handler which calls the saved handler.
SrcMgr.setDiagHandler(DiagHandler, this);
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
// Initialize the platform / file format parser.
switch (_Ctx.getObjectFileInfo()->getObjectFileType()) {
@@ -572,14 +566,13 @@ bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
bool AsmParser::enterIncludeFile(const std::string &Filename) {
std::string IncludedFile;
- int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
- if (NewBuf == -1)
+ unsigned NewBuf =
+ SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (!NewBuf)
return true;
CurBuffer = NewBuf;
-
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
-
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
return false;
}
@@ -588,8 +581,9 @@ bool AsmParser::enterIncludeFile(const std::string &Filename) {
/// returns true on failure.
bool AsmParser::processIncbinFile(const std::string &Filename) {
std::string IncludedFile;
- int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
- if (NewBuf == -1)
+ unsigned NewBuf =
+ SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+ if (!NewBuf)
return true;
// Pick up the bytes from the file and emit them.
@@ -597,13 +591,10 @@ bool AsmParser::processIncbinFile(const std::string &Filename) {
return false;
}
-void AsmParser::jumpToLoc(SMLoc Loc, int InBuffer) {
- if (InBuffer != -1) {
- CurBuffer = InBuffer;
- } else {
- CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
- }
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+void AsmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer) {
+ CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(),
+ Loc.getPointer());
}
const AsmToken &AsmParser::Lex() {
@@ -639,10 +630,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// If we are generating dwarf for assembly source files save the initial text
// section and generate a .file directive.
if (getContext().getGenDwarfForAssembly()) {
- getContext().setGenDwarfSection(getStreamer().getCurrentSection().first);
MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
getStreamer().EmitLabel(SectionStartSym);
- getContext().setGenDwarfSectionStartSym(SectionStartSym);
+ auto InsertResult = getContext().addGenDwarfSection(
+ getStreamer().getCurrentSection().first);
+ assert(InsertResult.second && ".text section should not have debug info yet");
+ InsertResult.first->second.first = SectionStartSym;
getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective(
0, StringRef(), getContext().getMainFileName()));
}
@@ -818,7 +811,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// Parse symbol variant
std::pair<StringRef, StringRef> Split;
if (!MAI.useParensForSymbolVariant()) {
- Split = Identifier.split('@');
+ if (FirstTokenKind == AsmToken::String) {
+ if (Lexer.is(AsmToken::At)) {
+ Lexer.Lex(); // eat @
+ SMLoc AtLoc = getLexer().getLoc();
+ StringRef VName;
+ if (parseIdentifier(VName))
+ return Error(AtLoc, "expected symbol variant after '@'");
+
+ Split = std::make_pair(Identifier, VName);
+ }
+ } else {
+ Split = Identifier.split('@');
+ }
} else if (Lexer.is(AsmToken::LParen)) {
Lexer.Lex(); // eat (
StringRef VName;
@@ -1236,8 +1241,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
default:
break;
case DK_IF:
+ case DK_IFEQ:
+ case DK_IFGE:
+ case DK_IFGT:
+ case DK_IFLE:
+ case DK_IFLT:
case DK_IFNE:
- return parseDirectiveIf(IDLoc);
+ return parseDirectiveIf(IDLoc, DirKind);
case DK_IFB:
return parseDirectiveIfb(IDLoc, true);
case DK_IFNB:
@@ -1581,12 +1591,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) {
printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
}
- // If we are generating dwarf for assembly source files and the current
- // section is the initial text section then generate a .loc directive for
- // the instruction.
+ // If we are generating dwarf for the current section then generate a .loc
+ // directive for the instruction.
if (!HadError && getContext().getGenDwarfForAssembly() &&
- getContext().getGenDwarfSection() ==
- getStreamer().getCurrentSection().first) {
+ getContext().getGenDwarfSectionSyms().count(
+ getStreamer().getCurrentSection().first)) {
unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
@@ -1685,13 +1694,15 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
const SMLoc &DiagLoc = Diag.getLoc();
- int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
- int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
+ unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ unsigned CppHashBuf =
+ Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
// Like SourceMgr::printMessage() we need to print the include stack if any
// before printing the message.
- int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
- if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) {
+ unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
+ if (!Parser->SavedDiagHandler && DiagCurBuffer &&
+ DiagCurBuffer != DiagSrcMgr.getMainFileID()) {
SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer);
DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS);
}
@@ -2018,7 +2029,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
break;
if (FAI >= NParameters) {
- assert(M && "expected macro to be defined");
+ assert(M && "expected macro to be defined");
Error(IDLoc,
"parameter named '" + FA.Name + "' does not exist for macro '" +
M->Name + "'");
@@ -2117,7 +2128,7 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
Lex();
return false;
@@ -3799,9 +3810,8 @@ bool AsmParser::parseDirectiveIncbin() {
}
/// parseDirectiveIf
-/// ::= .if expression
-/// ::= .ifne expression
-bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
+/// ::= .if{,eq,ge,gt,le,lt,ne} expression
+bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) {
TheCondStack.push_back(TheCondState);
TheCondState.TheCond = AsmCond::IfCond;
if (TheCondState.Ignore) {
@@ -3816,6 +3826,29 @@ bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) {
Lex();
+ switch (DirKind) {
+ default:
+ llvm_unreachable("unsupported directive");
+ case DK_IF:
+ case DK_IFNE:
+ break;
+ case DK_IFEQ:
+ ExprValue = ExprValue == 0;
+ break;
+ case DK_IFGE:
+ ExprValue = ExprValue >= 0;
+ break;
+ case DK_IFGT:
+ ExprValue = ExprValue > 0;
+ break;
+ case DK_IFLE:
+ ExprValue = ExprValue <= 0;
+ break;
+ case DK_IFLT:
+ ExprValue = ExprValue < 0;
+ break;
+ }
+
TheCondState.CondMet = ExprValue;
TheCondState.Ignore = !TheCondState.CondMet;
}
@@ -4118,6 +4151,11 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
DirectiveKindMap[".if"] = DK_IF;
+ DirectiveKindMap[".ifeq"] = DK_IFEQ;
+ DirectiveKindMap[".ifge"] = DK_IFGE;
+ DirectiveKindMap[".ifgt"] = DK_IFGT;
+ DirectiveKindMap[".ifle"] = DK_IFLE;
+ DirectiveKindMap[".iflt"] = DK_IFLT;
DirectiveKindMap[".ifne"] = DK_IFNE;
DirectiveKindMap[".ifb"] = DK_IFB;
DirectiveKindMap[".ifnb"] = DK_IFNB;
@@ -4227,7 +4265,7 @@ void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc,
// Jump to the macro instantiation and prime the lexer.
CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer());
Lex();
}
@@ -4465,27 +4503,27 @@ bool AsmParser::parseMSInlineAsm(
// Build the list of clobbers, outputs and inputs.
for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) {
- MCParsedAsmOperand *Operand = Info.ParsedOperands[i];
+ MCParsedAsmOperand &Operand = *Info.ParsedOperands[i];
// Immediate.
- if (Operand->isImm())
+ if (Operand.isImm())
continue;
// Register operand.
- if (Operand->isReg() && !Operand->needAddressOf()) {
+ if (Operand.isReg() && !Operand.needAddressOf()) {
unsigned NumDefs = Desc.getNumDefs();
// Clobber.
- if (NumDefs && Operand->getMCOperandNum() < NumDefs)
- ClobberRegs.push_back(Operand->getReg());
+ if (NumDefs && Operand.getMCOperandNum() < NumDefs)
+ ClobberRegs.push_back(Operand.getReg());
continue;
}
// Expr/Input or Output.
- StringRef SymName = Operand->getSymName();
+ StringRef SymName = Operand.getSymName();
if (SymName.empty())
continue;
- void *OpDecl = Operand->getOpDecl();
+ void *OpDecl = Operand.getOpDecl();
if (!OpDecl)
continue;
@@ -4494,21 +4532,21 @@ bool AsmParser::parseMSInlineAsm(
if (isOutput) {
++InputIdx;
OutputDecls.push_back(OpDecl);
- OutputDeclsAddressOf.push_back(Operand->needAddressOf());
- OutputConstraints.push_back('=' + Operand->getConstraint().str());
+ OutputDeclsAddressOf.push_back(Operand.needAddressOf());
+ OutputConstraints.push_back('=' + Operand.getConstraint().str());
AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
} else {
InputDecls.push_back(OpDecl);
- InputDeclsAddressOf.push_back(Operand->needAddressOf());
- InputConstraints.push_back(Operand->getConstraint().str());
+ InputDeclsAddressOf.push_back(Operand.needAddressOf());
+ InputConstraints.push_back(Operand.getConstraint().str());
AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
}
}
// Consider implicit defs to be clobbers. Think of cpuid and push.
- const uint16_t *ImpDefs = Desc.getImplicitDefs();
- for (unsigned I = 0, E = Desc.getNumImplicitDefs(); I != E; ++I)
- ClobberRegs.push_back(ImpDefs[I]);
+ ArrayRef<uint16_t> ImpDefs(Desc.getImplicitDefs(),
+ Desc.getNumImplicitDefs());
+ ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
}
// Set the number of Outputs and Inputs.
@@ -4543,27 +4581,26 @@ bool AsmParser::parseMSInlineAsm(
// Build the IR assembly string.
std::string AsmStringIR;
raw_string_ostream OS(AsmStringIR);
- const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart();
- const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
+ StringRef ASMString =
+ SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer();
+ const char *AsmStart = ASMString.begin();
+ const char *AsmEnd = ASMString.end();
array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort);
- for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(),
- E = AsmStrRewrites.end();
- I != E; ++I) {
- AsmRewriteKind Kind = (*I).Kind;
+ for (const AsmRewrite &AR : AsmStrRewrites) {
+ AsmRewriteKind Kind = AR.Kind;
if (Kind == AOK_Delete)
continue;
- const char *Loc = (*I).Loc.getPointer();
+ const char *Loc = AR.Loc.getPointer();
assert(Loc >= AsmStart && "Expected Loc to be at or after Start!");
// Emit everything up to the immediate/expression.
- unsigned Len = Loc - AsmStart;
- if (Len)
+ if (unsigned Len = Loc - AsmStart)
OS << StringRef(AsmStart, Len);
// Skip the original expression.
if (Kind == AOK_Skip) {
- AsmStart = Loc + (*I).Len;
+ AsmStart = Loc + AR.Len;
continue;
}
@@ -4573,7 +4610,7 @@ bool AsmParser::parseMSInlineAsm(
default:
break;
case AOK_Imm:
- OS << "$$" << (*I).Val;
+ OS << "$$" << AR.Val;
break;
case AOK_ImmPrefix:
OS << "$$";
@@ -4585,7 +4622,7 @@ bool AsmParser::parseMSInlineAsm(
OS << '$' << OutputIdx++;
break;
case AOK_SizeDirective:
- switch ((*I).Val) {
+ switch (AR.Val) {
default: break;
case 8: OS << "byte ptr "; break;
case 16: OS << "word ptr "; break;
@@ -4600,7 +4637,7 @@ bool AsmParser::parseMSInlineAsm(
OS << ".byte";
break;
case AOK_Align: {
- unsigned Val = (*I).Val;
+ unsigned Val = AR.Val;
OS << ".align " << Val;
// Skip the original immediate.
@@ -4613,12 +4650,12 @@ bool AsmParser::parseMSInlineAsm(
OS.flush();
if (AsmStringIR.back() != '.')
OS << '.';
- OS << (*I).Val;
+ OS << AR.Val;
break;
}
// Skip the original expression.
- AsmStart = Loc + (*I).Len + AdditionalSkip;
+ AsmStart = Loc + AR.Len + AdditionalSkip;
}
// Emit the remainder of the asm string.
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index decf01c..5ecf9e5 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSectionCOFF.h"
@@ -37,7 +38,7 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseSectionSwitch(StringRef Section, unsigned Characteristics,
SectionKind Kind, StringRef COMDATSymName,
- COFF::COMDATType Type, const MCSectionCOFF *Assoc);
+ COFF::COMDATType Type);
bool ParseSectionName(StringRef &SectionName);
bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags);
@@ -117,8 +118,7 @@ class COFFAsmParser : public MCAsmParserExtension {
bool ParseDirectiveEndef(StringRef, SMLoc);
bool ParseDirectiveSecRel32(StringRef, SMLoc);
bool ParseDirectiveSecIdx(StringRef, SMLoc);
- bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
- const MCSectionCOFF *&Assoc);
+ bool parseCOMDATType(COFF::COMDATType &Type);
bool ParseDirectiveLinkOnce(StringRef, SMLoc);
// Win64 EH directives.
@@ -170,8 +170,8 @@ bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
bool ReadOnlyRemoved = false;
unsigned SecFlags = None;
- for (unsigned i = 0; i < FlagsString.size(); ++i) {
- switch (FlagsString[i]) {
+ for (char FlagChar : FlagsString) {
+ switch (FlagChar) {
case 'a':
// Ignored.
break;
@@ -292,22 +292,20 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
unsigned Characteristics,
SectionKind Kind) {
- return ParseSectionSwitch(Section, Characteristics, Kind, "",
- COFF::IMAGE_COMDAT_SELECT_ANY, nullptr);
+ return ParseSectionSwitch(Section, Characteristics, Kind, "", (COFF::COMDATType)0);
}
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
unsigned Characteristics,
SectionKind Kind,
StringRef COMDATSymName,
- COFF::COMDATType Type,
- const MCSectionCOFF *Assoc) {
+ COFF::COMDATType Type) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in section switching directive");
Lex();
getStreamer().SwitchSection(getContext().getCOFFSection(
- Section, Characteristics, Kind, COMDATSymName, Type, Assoc));
+ Section, Characteristics, Kind, COMDATSymName, Type));
return false;
}
@@ -358,15 +356,15 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
return true;
}
- COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
- const MCSectionCOFF *Assoc = nullptr;
+ COFF::COMDATType Type = (COFF::COMDATType)0;
StringRef COMDATSymName;
if (getLexer().is(AsmToken::Comma)) {
+ Type = COFF::IMAGE_COMDAT_SELECT_ANY;;
Lex();
Flags |= COFF::IMAGE_SCN_LNK_COMDAT;
- if (parseCOMDATTypeAndAssoc(Type, Assoc))
+ if (parseCOMDATType(Type))
return true;
if (getLexer().isNot(AsmToken::Comma))
@@ -381,7 +379,12 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
return TokError("unexpected token in directive");
SectionKind Kind = computeSectionKind(Flags);
- ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc);
+ if (Kind.isText()) {
+ const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
+ if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
+ Flags |= COFF::IMAGE_SCN_MEM_16BIT;
+ }
+ ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type);
return false;
}
@@ -461,9 +464,8 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) {
return false;
}
-/// ::= [ identifier [ identifier ] ]
-bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
- const MCSectionCOFF *&Assoc) {
+/// ::= [ identifier ]
+bool COFFAsmParser::parseCOMDATType(COFF::COMDATType &Type) {
StringRef TypeId = getTok().getIdentifier();
Type = StringSwitch<COFF::COMDATType>(TypeId)
@@ -481,48 +483,28 @@ bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
Lex();
- if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
- SMLoc Loc = getTok().getLoc();
- StringRef AssocName;
- if (ParseSectionName(AssocName))
- return TokError("expected associated section name");
-
- Assoc = static_cast<const MCSectionCOFF*>(
- getContext().getCOFFSection(AssocName));
- if (!Assoc)
- return Error(Loc, "cannot associate unknown section '" + AssocName + "'");
- if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT))
- return Error(Loc, "associated section must be a COMDAT section");
- if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
- return Error(Loc, "associated section cannot be itself associative");
- }
-
return false;
}
/// ParseDirectiveLinkOnce
-/// ::= .linkonce [ identifier [ identifier ] ]
+/// ::= .linkonce [ identifier ]
bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
- const MCSectionCOFF *Assoc = nullptr;
if (getLexer().is(AsmToken::Identifier))
- if (parseCOMDATTypeAndAssoc(Type, Assoc))
+ if (parseCOMDATType(Type))
return true;
const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
getStreamer().getCurrentSection().first);
-
- if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
- if (Assoc == Current)
- return Error(Loc, "cannot associate a section with itself");
- }
+ if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
+ return Error(Loc, "cannot make section associative with .linkonce");
if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)
return Error(Loc, Twine("section '") + Current->getSectionName() +
"' is already linkonce");
- Current->setSelection(Type, Assoc);
+ Current->setSelection(Type);
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
@@ -541,25 +523,25 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitWin64EHStartProc(Symbol);
+ getStreamer().EmitWinCFIStartProc(Symbol);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) {
Lex();
- getStreamer().EmitWin64EHEndProc();
+ getStreamer().EmitWinCFIEndProc();
return false;
}
bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) {
Lex();
- getStreamer().EmitWin64EHStartChained();
+ getStreamer().EmitWinCFIStartChained();
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) {
Lex();
- getStreamer().EmitWin64EHEndChained();
+ getStreamer().EmitWinCFIEndChained();
return false;
}
@@ -585,13 +567,13 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) {
MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID);
Lex();
- getStreamer().EmitWin64EHHandler(handler, unwind, except);
+ getStreamer().EmitWinEHHandler(handler, unwind, except);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) {
Lex();
- getStreamer().EmitWin64EHHandlerData();
+ getStreamer().EmitWinEHHandlerData();
return false;
}
@@ -604,7 +586,7 @@ bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitWin64EHPushReg(Reg);
+ getStreamer().EmitWinCFIPushReg(Reg);
return false;
}
@@ -628,7 +610,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitWin64EHSetFrame(Reg, Off);
+ getStreamer().EmitWinCFISetFrame(Reg, Off);
return false;
}
@@ -645,7 +627,7 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitWin64EHAllocStack(Size);
+ getStreamer().EmitWinCFIAllocStack(Size);
return false;
}
@@ -670,7 +652,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) {
Lex();
// FIXME: Err on %xmm* registers
- getStreamer().EmitWin64EHSaveReg(Reg, Off);
+ getStreamer().EmitWinCFISaveReg(Reg, Off);
return false;
}
@@ -697,7 +679,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) {
Lex();
// FIXME: Err on non-%xmm* registers
- getStreamer().EmitWin64EHSaveXMM(Reg, Off);
+ getStreamer().EmitWinCFISaveXMM(Reg, Off);
return false;
}
@@ -718,13 +700,13 @@ bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) {
return TokError("unexpected token in directive");
Lex();
- getStreamer().EmitWin64EHPushFrame(Code);
+ getStreamer().EmitWinCFIPushFrame(Code);
return false;
}
bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) {
Lex();
- getStreamer().EmitWin64EHEndProlog();
+ getStreamer().EmitWinCFIEndProlog();
return false;
}
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index f74b30a..b2a6785 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -650,7 +650,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
}
// Write the message.
- int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+ unsigned CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
*OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
<< ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
<< LogMessage + "\n";
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 95c4971..98b2b3b 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -150,7 +150,7 @@ public:
private:
bool ParseSectionName(StringRef &SectionName);
- bool ParseSectionArguments(bool IsPush);
+ bool ParseSectionArguments(bool IsPush, SMLoc loc);
unsigned parseSunStyleSectionFlags();
};
@@ -382,7 +382,7 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() {
bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
getStreamer().PushSection();
- if (ParseSectionArguments(/*IsPush=*/true)) {
+ if (ParseSectionArguments(/*IsPush=*/true, loc)) {
getStreamer().PopSection();
return true;
}
@@ -397,11 +397,11 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
}
// FIXME: This is a work in progress.
-bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
- return ParseSectionArguments(/*IsPush=*/false);
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) {
+ return ParseSectionArguments(/*IsPush=*/false, loc);
}
-bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
+bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
StringRef SectionName;
if (ParseSectionName(SectionName))
@@ -545,10 +545,24 @@ EndStmt:
}
SectionKind Kind = computeSectionKind(Flags, Size);
- getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
- Flags, Kind, Size,
- GroupName),
- Subsection);
+ const MCSection *ELFSection = getContext().getELFSection(
+ SectionName, Type, Flags, Kind, Size, GroupName);
+ getStreamer().SwitchSection(ELFSection, Subsection);
+
+ if (getContext().getGenDwarfForAssembly()) {
+ auto &Sections = getContext().getGenDwarfSectionSyms();
+ auto InsertResult = Sections.insert(
+ std::make_pair(ELFSection, std::make_pair(nullptr, nullptr)));
+ if (InsertResult.second) {
+ if (getContext().getDwarfVersion() <= 2)
+ Error(loc, "DWARF2 only supports one section per compilation unit");
+
+ MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol();
+ getStreamer().EmitLabel(SectionStartSymbol);
+ InsertResult.first->second.first = SectionStartSymbol;
+ }
+ }
+
return false;
}
@@ -561,6 +575,19 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
return false;
}
+static MCSymbolAttr MCAttrForString(StringRef Type) {
+ return StringSwitch<MCSymbolAttr>(Type)
+ .Cases("STT_FUNC", "function", MCSA_ELF_TypeFunction)
+ .Cases("STT_OBJECT", "object", MCSA_ELF_TypeObject)
+ .Cases("STT_TLS", "tls_object", MCSA_ELF_TypeTLS)
+ .Cases("STT_COMMON", "common", MCSA_ELF_TypeCommon)
+ .Cases("STT_NOTYPE", "notype", MCSA_ELF_TypeNoType)
+ .Cases("STT_GNU_IFUNC", "gnu_indirect_function",
+ MCSA_ELF_TypeIndFunction)
+ .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+ .Default(MCSA_Invalid);
+}
+
/// ParseDirectiveELFType
/// ::= .type identifier , STT_<TYPE_IN_UPPER_CASE>
/// ::= .type identifier , #attribute
@@ -575,53 +602,36 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
// Handle the identifier as the key symbol.
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '.type' directive");
- Lex();
-
- StringRef Type;
- SMLoc TypeLoc;
- MCSymbolAttr Attr;
- if (getLexer().is(AsmToken::Identifier)) {
- TypeLoc = getLexer().getLoc();
- if (getParser().parseIdentifier(Type))
- return TokError("expected symbol type in directive");
- Attr = StringSwitch<MCSymbolAttr>(Type)
- .Case("STT_FUNC", MCSA_ELF_TypeFunction)
- .Case("STT_OBJECT", MCSA_ELF_TypeObject)
- .Case("STT_TLS", MCSA_ELF_TypeTLS)
- .Case("STT_COMMON", MCSA_ELF_TypeCommon)
- .Case("STT_NOTYPE", MCSA_ELF_TypeNoType)
- .Case("STT_GNU_IFUNC", MCSA_ELF_TypeIndFunction)
- .Default(MCSA_Invalid);
- } else if (getLexer().is(AsmToken::Hash) || getLexer().is(AsmToken::At) ||
- getLexer().is(AsmToken::Percent) ||
- getLexer().is(AsmToken::String)) {
- if (!getLexer().is(AsmToken::String))
- Lex();
+ // NOTE the comma is optional in all cases. It is only documented as being
+ // optional in the first case, however, GAS will silently treat the comma as
+ // optional in all cases. Furthermore, although the documentation states that
+ // the first form only accepts STT_<TYPE_IN_UPPER_CASE>, in reality, GAS
+ // accepts both the upper case name as well as the lower case aliases.
+ if (getLexer().is(AsmToken::Comma))
+ Lex();
- TypeLoc = getLexer().getLoc();
- if (getParser().parseIdentifier(Type))
- return TokError("expected symbol type in directive");
- Attr = StringSwitch<MCSymbolAttr>(Type)
- .Case("function", MCSA_ELF_TypeFunction)
- .Case("object", MCSA_ELF_TypeObject)
- .Case("tls_object", MCSA_ELF_TypeTLS)
- .Case("common", MCSA_ELF_TypeCommon)
- .Case("notype", MCSA_ELF_TypeNoType)
- .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
- .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
- .Default(MCSA_Invalid);
- } else
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::Hash) && getLexer().isNot(AsmToken::At) &&
+ getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::String))
return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', "
"'%<type>' or \"<type>\"");
+ if (getLexer().isNot(AsmToken::String) &&
+ getLexer().isNot(AsmToken::Identifier))
+ Lex();
+
+ SMLoc TypeLoc = getLexer().getLoc();
+
+ StringRef Type;
+ if (getParser().parseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+
+ MCSymbolAttr Attr = MCAttrForString(Type);
if (Attr == MCSA_Invalid)
return Error(TypeLoc, "unsupported attribute in '.type' directive");
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.type' directive");
-
Lex();
getStreamer().EmitSymbolAttribute(Sym, Attr);
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index 335b8cd..fc2bd36 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -30,14 +30,9 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
return false;
}
-void MCSectionCOFF::setSelection(int Selection,
- const MCSectionCOFF *Assoc) const {
+void MCSectionCOFF::setSelection(int Selection) const {
assert(Selection != 0 && "invalid COMDAT selection type");
- assert((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) ==
- (Assoc != nullptr) &&
- "associative COMDAT section must have an associated section");
this->Selection = Selection;
- this->Assoc = Assoc;
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
}
@@ -82,7 +77,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << "same_contents,";
break;
case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE:
- OS << "associative " << Assoc->getSectionName() << ",";
+ OS << "associative,";
break;
case COFF::IMAGE_COMDAT_SELECT_LARGEST:
OS << "largest,";
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 7dccf0d..bdcdb97 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -37,7 +37,7 @@ void MCTargetStreamer::finish() {}
void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
- : Context(Ctx), CurrentW64UnwindInfo(nullptr), LastSymbol(nullptr) {
+ : Context(Ctx), CurrentW64UnwindInfo(nullptr) {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
@@ -51,7 +51,6 @@ void MCStreamer::reset() {
delete W64UnwindInfos[i];
W64UnwindInfos.clear();
CurrentW64UnwindInfo = nullptr;
- LastSymbol = nullptr;
SectionStack.clear();
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
@@ -234,20 +233,12 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) {
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection().first && "Cannot emit before setting section!");
AssignSection(Symbol, getCurrentSection().first);
- LastSymbol = Symbol;
MCTargetStreamer *TS = getTargetStreamer();
if (TS)
TS->emitLabel(Symbol);
}
-void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
- assert(getCurrentSection().first && "Cannot emit before setting section!");
- AssignSection(Symbol, getCurrentSection().first);
- LastSymbol = Symbol;
-}
-
void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
EnsureValidFrame();
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
@@ -273,17 +264,6 @@ void MCStreamer::EmitCFIStartProc(bool IsSimple) {
void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
}
-void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) {
- // Report an error if we haven't seen a symbol yet where we'd bind
- // .cfi_startproc.
- if (!LastSymbol)
- report_fatal_error("No symbol to start a frame");
- Frame.Function = LastSymbol;
- // We need to create a local symbol to avoid relocations.
- Frame.Begin = getContext().CreateTempSymbol();
- EmitLabel(Frame.Begin);
-}
-
void MCStreamer::EmitCFIEndProc() {
EnsureValidFrame();
MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
@@ -291,11 +271,9 @@ void MCStreamer::EmitCFIEndProc() {
}
void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
-}
-
-void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
- Frame.End = getContext().CreateTempSymbol();
- EmitLabel(Frame.End);
+ // Put a dummy non-null value in Frame.End to mark that this frame has been
+ // closed.
+ Frame.End = (MCSymbol *) 1;
}
MCSymbol *MCStreamer::EmitCFICommon() {
@@ -447,7 +425,7 @@ void MCStreamer::EnsureValidW64UnwindInfo() {
report_fatal_error("No open Win64 EH frame function!");
}
-void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
+void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame && !CurFrame->End)
report_fatal_error("Starting a function before ending the previous one!");
@@ -458,7 +436,7 @@ void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
setCurrentW64UnwindInfo(Frame);
}
-void MCStreamer::EmitWin64EHEndProc() {
+void MCStreamer::EmitWinCFIEndProc() {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame->ChainedParent)
@@ -467,7 +445,7 @@ void MCStreamer::EmitWin64EHEndProc() {
EmitLabel(CurFrame->End);
}
-void MCStreamer::EmitWin64EHStartChained() {
+void MCStreamer::EmitWinCFIStartChained() {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo;
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
@@ -478,7 +456,7 @@ void MCStreamer::EmitWin64EHStartChained() {
setCurrentW64UnwindInfo(Frame);
}
-void MCStreamer::EmitWin64EHEndChained() {
+void MCStreamer::EmitWinCFIEndChained() {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (!CurFrame->ChainedParent)
@@ -488,8 +466,8 @@ void MCStreamer::EmitWin64EHEndChained() {
CurrentW64UnwindInfo = CurFrame->ChainedParent;
}
-void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
- bool Except) {
+void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind,
+ bool Except) {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame->ChainedParent)
@@ -503,14 +481,14 @@ void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind,
CurFrame->HandlesExceptions = true;
}
-void MCStreamer::EmitWin64EHHandlerData() {
+void MCStreamer::EmitWinEHHandlerData() {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame->ChainedParent)
report_fatal_error("Chained unwind areas can't have handlers!");
}
-void MCStreamer::EmitWin64EHPushReg(unsigned Register) {
+void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
MCSymbol *Label = getContext().CreateTempSymbol();
@@ -519,13 +497,15 @@ void MCStreamer::EmitWin64EHPushReg(unsigned Register) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
+void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame->LastFrameInst >= 0)
report_fatal_error("Frame register and offset already specified!");
if (Offset & 0x0F)
report_fatal_error("Misaligned frame pointer offset!");
+ if (Offset > 240)
+ report_fatal_error("Frame offset must be less than or equal to 240!");
MCSymbol *Label = getContext().CreateTempSymbol();
MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset);
EmitLabel(Label);
@@ -533,8 +513,10 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHAllocStack(unsigned Size) {
+void MCStreamer::EmitWinCFIAllocStack(unsigned Size) {
EnsureValidW64UnwindInfo();
+ if (Size == 0)
+ report_fatal_error("Allocation size must be non-zero!");
if (Size & 7)
report_fatal_error("Misaligned stack allocation!");
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
@@ -544,7 +526,7 @@ void MCStreamer::EmitWin64EHAllocStack(unsigned Size) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
+void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) {
EnsureValidW64UnwindInfo();
if (Offset & 7)
report_fatal_error("Misaligned saved register offset!");
@@ -557,7 +539,7 @@ void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
+void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
EnsureValidW64UnwindInfo();
if (Offset & 0x0F)
report_fatal_error("Misaligned saved vector register offset!");
@@ -570,7 +552,7 @@ void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHPushFrame(bool Code) {
+void MCStreamer::EmitWinCFIPushFrame(bool Code) {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
if (CurFrame->Instructions.size() > 0)
@@ -581,7 +563,7 @@ void MCStreamer::EmitWin64EHPushFrame(bool Code) {
CurFrame->Instructions.push_back(Inst);
}
-void MCStreamer::EmitWin64EHEndProlog() {
+void MCStreamer::EmitWinCFIEndProlog() {
EnsureValidW64UnwindInfo();
MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo;
CurFrame->PrologEnd = getContext().CreateTempSymbol();
@@ -589,11 +571,9 @@ void MCStreamer::EmitWin64EHEndProlog() {
}
void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
- llvm_unreachable("This file format doesn't support this directive");
}
void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
- llvm_unreachable("This file format doesn't support this directive");
}
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
@@ -629,9 +609,82 @@ void MCStreamer::Finish() {
}
void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ visitUsedExpr(*Value);
Symbol->setVariableValue(Value);
MCTargetStreamer *TS = getTargetStreamer();
if (TS)
TS->emitAssignment(Symbol, Value);
}
+
+void MCStreamer::visitUsedSymbol(const MCSymbol &Sym) {
+}
+
+void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
+ switch (Expr.getKind()) {
+ case MCExpr::Target:
+ cast<MCTargetExpr>(Expr).visitUsedExpr(*this);
+ break;
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
+ visitUsedExpr(*BE.getLHS());
+ visitUsedExpr(*BE.getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ visitUsedSymbol(cast<MCSymbolRefExpr>(Expr).getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ visitUsedExpr(*cast<MCUnaryExpr>(Expr).getSubExpr());
+ break;
+ }
+}
+
+void MCStreamer::EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--;)
+ if (Inst.getOperand(i).isExpr())
+ visitUsedExpr(*Inst.getOperand(i).getExpr());
+}
+
+void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+void MCStreamer::EmitThumbFunc(MCSymbol *Func) {}
+void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+void MCStreamer::EndCOFFSymbolDef() {}
+void MCStreamer::EmitFileDirective(StringRef Filename) {}
+void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {}
+void MCStreamer::EmitCOFFSymbolType(int Type) {}
+void MCStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {}
+void MCStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
+void MCStreamer::ChangeSection(const MCSection *, const MCExpr *) {}
+void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
+void MCStreamer::EmitBytes(StringRef Data) {}
+void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
+ visitUsedExpr(*Value);
+}
+void MCStreamer::EmitULEB128Value(const MCExpr *Value) {}
+void MCStreamer::EmitSLEB128Value(const MCExpr *Value) {}
+void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {}
+void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {}
+bool MCStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) {
+ return false;
+}
+void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
+void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
+void MCStreamer::FinishImpl() {}
+void MCStreamer::EmitBundleUnlock() {}
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 8e946d5..efd724a 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -14,6 +14,7 @@ namespace llvm {
MCTargetOptions::MCTargetOptions()
: SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
MCSaveTempLabels(false), MCUseDwarfDirectory(false),
- ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {}
+ ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+ DwarfVersion(0) {}
} // end namespace llvm
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index b8b07d3..bb651647 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -20,34 +20,30 @@ namespace llvm {
// NOTE: All relocations generated here are 4-byte image-relative.
-static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &instArray){
- uint8_t count = 0;
- for (std::vector<MCWin64EHInstruction>::const_iterator I = instArray.begin(),
- E = instArray.end(); I != E; ++I) {
- switch (I->getOperation()) {
+static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &Insns) {
+ uint8_t Count = 0;
+ for (const auto &I : Insns) {
+ switch (I.getOperation()) {
case Win64EH::UOP_PushNonVol:
case Win64EH::UOP_AllocSmall:
case Win64EH::UOP_SetFPReg:
case Win64EH::UOP_PushMachFrame:
- count += 1;
+ Count += 1;
break;
case Win64EH::UOP_SaveNonVol:
case Win64EH::UOP_SaveXMM128:
- count += 2;
+ Count += 2;
break;
case Win64EH::UOP_SaveNonVolBig:
case Win64EH::UOP_SaveXMM128Big:
- count += 3;
+ Count += 3;
break;
case Win64EH::UOP_AllocLarge:
- if (I->getSize() > 512*1024-8)
- count += 3;
- else
- count += 2;
+ Count += (I.getSize() > 512 * 1024 - 8) ? 3 : 2;
break;
}
}
- return count;
+ return Count;
}
static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs,
@@ -274,23 +270,23 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
llvm::EmitUnwindInfo(streamer, info);
}
-void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
- MCContext &context = streamer.getContext();
+void MCWin64EHUnwindEmitter::Emit(MCStreamer &Streamer) {
+ MCContext &Context = Streamer.getContext();
+
// Emit the unwind info structs first.
- for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
- MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
- const MCSection *xdataSect =
- getWin64EHTableSection(GetSectionSuffix(info.Function), context);
- streamer.SwitchSection(xdataSect);
- llvm::EmitUnwindInfo(streamer, &info);
+ for (const auto &CFI : Streamer.getW64UnwindInfos()) {
+ const MCSection *XData =
+ getWin64EHTableSection(GetSectionSuffix(CFI->Function), Context);
+ Streamer.SwitchSection(XData);
+ EmitUnwindInfo(Streamer, CFI);
}
+
// Now emit RUNTIME_FUNCTION entries.
- for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
- MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
- const MCSection *pdataSect =
- getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context);
- streamer.SwitchSection(pdataSect);
- EmitRuntimeFunction(streamer, &info);
+ for (const auto &CFI : Streamer.getW64UnwindInfos()) {
+ const MCSection *PData =
+ getWin64EHFuncTableSection(GetSectionSuffix(CFI->Function), Context);
+ Streamer.SwitchSection(PData);
+ EmitRuntimeFunction(Streamer, CFI);
}
}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index cbaf0b8..5214398 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -303,20 +303,50 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
}
+MachObjectWriter::MachSymbolData *
+MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
+ for (auto &Entry : LocalSymbolData)
+ if (&Entry.SymbolData->getSymbol() == &Sym)
+ return &Entry;
+
+ for (auto &Entry : ExternalSymbolData)
+ if (&Entry.SymbolData->getSymbol() == &Sym)
+ return &Entry;
+
+ for (auto &Entry : UndefinedSymbolData)
+ if (&Entry.SymbolData->getSymbol() == &Sym)
+ return &Entry;
+
+ return nullptr;
+}
+
void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
const MCAsmLayout &Layout) {
MCSymbolData &Data = *MSD.SymbolData;
- const MCSymbol &Symbol = Data.getSymbol();
+ const MCSymbol *Symbol = &Data.getSymbol();
+ const MCSymbol *AliasedSymbol = &Symbol->AliasedSymbol();
+ uint8_t SectionIndex = MSD.SectionIndex;
uint8_t Type = 0;
uint16_t Flags = Data.getFlags();
uint64_t Address = 0;
+ bool IsAlias = Symbol != AliasedSymbol;
+
+ MachSymbolData *AliaseeInfo;
+ if (IsAlias) {
+ AliaseeInfo = findSymbolData(*AliasedSymbol);
+ if (AliaseeInfo)
+ SectionIndex = AliaseeInfo->SectionIndex;
+ Symbol = AliasedSymbol;
+ }
// Set the N_TYPE bits. See <mach-o/nlist.h>.
//
// FIXME: Are the prebound or indirect fields possible here?
- if (Symbol.isUndefined())
+ if (IsAlias && Symbol->isUndefined())
+ Type = MachO::N_INDR;
+ else if (Symbol->isUndefined())
Type = MachO::N_UNDF;
- else if (Symbol.isAbsolute())
+ else if (Symbol->isAbsolute())
Type = MachO::N_ABS;
else
Type = MachO::N_SECT;
@@ -327,13 +357,15 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
Type |= MachO::N_PEXT;
// Set external bit.
- if (Data.isExternal() || Symbol.isUndefined())
+ if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
Type |= MachO::N_EXT;
// Compute the symbol address.
- if (Symbol.isDefined()) {
+ if (IsAlias && Symbol->isUndefined())
+ Address = AliaseeInfo->StringIndex;
+ else if (Symbol->isDefined())
Address = getSymbolAddress(&Data, Layout);
- } else if (Data.isCommon()) {
+ else if (Data.isCommon()) {
// Common symbols are encoded with the size in the address
// field, and their alignment in the flags.
Address = Data.getCommonSize();
@@ -344,21 +376,21 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
if (Log2Size > 15)
report_fatal_error("invalid 'common' alignment '" +
- Twine(Align) + "' for '" + Symbol.getName() + "'",
+ Twine(Align) + "' for '" + Symbol->getName() + "'",
false);
// FIXME: Keep this mask with the SymbolFlags enumeration.
Flags = (Flags & 0xF0FF) | (Log2Size << 8);
}
}
- if (Layout.getAssembler().isThumbFunc(&Symbol))
+ if (Layout.getAssembler().isThumbFunc(Symbol))
Flags |= SF_ThumbFunc;
// struct nlist (12 bytes)
Write32(MSD.StringIndex);
Write8(Type);
- Write8(MSD.SectionIndex);
+ Write8(SectionIndex);
// The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
// value.
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index bf8b7c0..a10f17e 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCParser MCDisassembler
+PARALLEL_DIRS := MCAnalysis MCParser MCDisassembler
include $(LEVEL)/Makefile.common
diff --git a/lib/Object/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index 9152834..db58ece 100644
--- a/lib/Object/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Object/StringTableBuilder.h"
using namespace llvm;
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 961cbc6..a462c0d 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -347,6 +347,14 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
COFFSection *coff_section = createSection(Sec.getSectionName());
COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
+ if (Sec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+ if (const MCSymbol *S = Sec.getCOMDATSymbol()) {
+ COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S);
+ if (COMDATSymbol->Section)
+ report_fatal_error("two sections have the same comdat");
+ COMDATSymbol->Section = coff_section;
+ }
+ }
coff_section->Symbol = coff_symbol;
coff_symbol->Section = coff_section;
@@ -458,9 +466,15 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
} else {
const MCSymbolData &BaseData = Assembler.getSymbolData(*Base);
- if (BaseData.Fragment)
- coff_symbol->Section =
+ if (BaseData.Fragment) {
+ COFFSection *Sec =
SectionMap[&BaseData.Fragment->getParent()->getSection()];
+
+ if (coff_symbol->Section && coff_symbol->Section != Sec)
+ report_fatal_error("conflicting sections for symbol");
+
+ coff_symbol->Section = Sec;
+ }
}
coff_symbol->MCData = &ResSymData;
@@ -537,7 +551,7 @@ bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
// This doesn't seem to be right. Strings referred to from the .data section
// need symbols so they can be linked to code in the .text section right?
- // return Asm.isSymbolLinkerVisible (&SymbolData);
+ // return Asm.isSymbolLinkerVisible(SymbolData.getSymbol());
// For now, all non-variable symbols are exported,
// the linker will sort the rest out for us.
@@ -819,13 +833,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
DenseMap<COFFSection *, uint16_t> SectionIndices;
for (auto & Section : Sections) {
- if (Layout.getSectionAddressSize(Section->MCData) > 0) {
- size_t Number = ++Header.NumberOfSections;
- SectionIndices[Section.get()] = Number;
- MakeSectionReal(*Section, Number);
- } else {
- Section->Number = -1;
- }
+ size_t Number = ++Header.NumberOfSections;
+ SectionIndices[Section.get()] = Number;
+ MakeSectionReal(*Section, Number);
}
Header.NumberOfSymbols = 0;
@@ -865,11 +875,15 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
const MCSectionCOFF &MCSec =
static_cast<const MCSectionCOFF &>(Section->MCData->getSection());
- COFFSection *Assoc = SectionMap.lookup(MCSec.getAssocSection());
+ const MCSymbol *COMDAT = MCSec.getCOMDATSymbol();
+ assert(COMDAT);
+ COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(COMDAT);
+ assert(COMDATSymbol);
+ COFFSection *Assoc = COMDATSymbol->Section;
if (!Assoc)
- report_fatal_error(Twine("Missing associated COMDAT section ") +
- MCSec.getAssocSection()->getSectionName() +
- " for section " + MCSec.getSectionName());
+ report_fatal_error(
+ Twine("Missing associated COMDAT section for section ") +
+ MCSec.getSectionName());
// Skip this section if the associated section is unused.
if (Assoc->Number == -1)
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index e6df465..d391a3f 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -82,10 +82,6 @@ void MCWinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
MCObjectStreamer::EmitLabel(Symbol);
}
-void MCWinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
- EmitLabel(Symbol);
-}
-
void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
llvm_unreachable("not implemented");
}
@@ -242,7 +238,7 @@ void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) {
llvm_unreachable("not implemented");
}
-void MCWinCOFFStreamer::EmitWin64EHHandlerData() {
+void MCWinCOFFStreamer::EmitWinEHHandlerData() {
llvm_unreachable("not implemented");
}
diff --git a/lib/Object/YAML.cpp b/lib/MC/YAML.cpp
index 61e9da3..067e91a 100644
--- a/lib/Object/YAML.cpp
+++ b/lib/MC/YAML.cpp
@@ -12,21 +12,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Object/YAML.h"
+#include "llvm/MC/YAML.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
using namespace llvm;
-using namespace object::yaml;
-void yaml::ScalarTraits<object::yaml::BinaryRef>::output(
- const object::yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) {
+void yaml::ScalarTraits<yaml::BinaryRef>::output(
+ const yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) {
Val.writeAsHex(Out);
}
-StringRef yaml::ScalarTraits<object::yaml::BinaryRef>::input(
- StringRef Scalar, void *, object::yaml::BinaryRef &Val) {
+StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *,
+ yaml::BinaryRef &Val) {
if (Scalar.size() % 2 != 0)
return "BinaryRef hex string must contain an even number of nybbles.";
// TODO: Can we improve YAMLIO to permit a more accurate diagnostic here?
@@ -34,11 +33,11 @@ StringRef yaml::ScalarTraits<object::yaml::BinaryRef>::input(
for (unsigned I = 0, N = Scalar.size(); I != N; ++I)
if (!isxdigit(Scalar[I]))
return "BinaryRef hex string must contain only hex digits.";
- Val = object::yaml::BinaryRef(Scalar);
+ Val = yaml::BinaryRef(Scalar);
return StringRef();
}
-void BinaryRef::writeAsBinary(raw_ostream &OS) const {
+void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const {
if (!DataIsHexString) {
OS.write((const char *)Data.data(), Data.size());
return;
@@ -50,7 +49,7 @@ void BinaryRef::writeAsBinary(raw_ostream &OS) const {
}
}
-void BinaryRef::writeAsHex(raw_ostream &OS) const {
+void yaml::BinaryRef::writeAsHex(raw_ostream &OS) const {
if (binary_size() == 0)
return;
if (DataIsHexString) {
diff --git a/lib/Object/Android.mk b/lib/Object/Android.mk
index 4385f5a..acda4f2 100644
--- a/lib/Object/Android.mk
+++ b/lib/Object/Android.mk
@@ -7,17 +7,15 @@ object_SRC_FILES := \
COFFYAML.cpp \
ELF.cpp \
ELFObjectFile.cpp \
+ ELFYAML.cpp \
Error.cpp \
IRObjectFile.cpp \
MachOObjectFile.cpp \
MachOUniversal.cpp \
Object.cpp \
ObjectFile.cpp \
- StringTableBuilder.cpp \
- SymbolicFile.cpp \
- YAML.cpp \
- ELFYAML.cpp \
-
+ RecordStreamer.cpp \
+ SymbolicFile.cpp
# For the host
# =====================================================
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 304ca47..6d09bdb 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -115,18 +115,14 @@ Archive::Child Archive::Child::getNext() const {
return Child(Parent, NextLoc);
}
-error_code Archive::Child::getName(StringRef &Result) const {
+ErrorOr<StringRef> Archive::Child::getName() const {
StringRef name = getRawName();
// Check if it's a special name.
if (name[0] == '/') {
- if (name.size() == 1) { // Linker member.
- Result = name;
- return object_error::success;
- }
- if (name.size() == 2 && name[1] == '/') { // String table.
- Result = name;
- return object_error::success;
- }
+ if (name.size() == 1) // Linker member.
+ return name;
+ if (name.size() == 2 && name[1] == '/') // String table.
+ return name;
// It's a long name.
// Get the offset.
std::size_t offset;
@@ -147,68 +143,62 @@ error_code Archive::Child::getName(StringRef &Result) const {
// GNU long file names end with a /.
if (Parent->kind() == K_GNU) {
StringRef::size_type End = StringRef(addr).find('/');
- Result = StringRef(addr, End);
- } else {
- Result = addr;
+ return StringRef(addr, End);
}
- return object_error::success;
+ return StringRef(addr);
} else if (name.startswith("#1/")) {
uint64_t name_size;
if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
llvm_unreachable("Long name length is not an ingeter");
- Result = Data.substr(sizeof(ArchiveMemberHeader), name_size)
+ return Data.substr(sizeof(ArchiveMemberHeader), name_size)
.rtrim(StringRef("\0", 1));
- return object_error::success;
}
// It's a simple name.
if (name[name.size() - 1] == '/')
- Result = name.substr(0, name.size() - 1);
- else
- Result = name;
- return object_error::success;
+ return name.substr(0, name.size() - 1);
+ return name;
}
-error_code Archive::Child::getMemoryBuffer(std::unique_ptr<MemoryBuffer> &Result,
- bool FullPath) const {
- StringRef Name;
- if (error_code ec = getName(Name))
- return ec;
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+Archive::Child::getMemoryBuffer(bool FullPath) const {
+ ErrorOr<StringRef> NameOrErr = getName();
+ if (std::error_code EC = NameOrErr.getError())
+ return EC;
+ StringRef Name = NameOrErr.get();
SmallString<128> Path;
- Result.reset(MemoryBuffer::getMemBuffer(
- getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name + ")")
- .toStringRef(Path)
- : Name,
+ std::unique_ptr<MemoryBuffer> Ret(MemoryBuffer::getMemBuffer(
+ getBuffer(),
+ FullPath
+ ? (Twine(Parent->getFileName()) + "(" + Name + ")").toStringRef(Path)
+ : Name,
false));
- return error_code::success();
+ return std::move(Ret);
}
-error_code Archive::Child::getAsBinary(std::unique_ptr<Binary> &Result,
- LLVMContext *Context) const {
+ErrorOr<std::unique_ptr<Binary>>
+Archive::Child::getAsBinary(LLVMContext *Context) const {
std::unique_ptr<Binary> ret;
- std::unique_ptr<MemoryBuffer> Buff;
- if (error_code ec = getMemoryBuffer(Buff))
- return ec;
- ErrorOr<Binary *> BinaryOrErr = createBinary(Buff.release(), Context);
- if (error_code EC = BinaryOrErr.getError())
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = getMemoryBuffer();
+ if (std::error_code EC = BuffOrErr.getError())
return EC;
- Result.reset(BinaryOrErr.get());
- return object_error::success;
+
+ std::unique_ptr<MemoryBuffer> Buff(BuffOrErr.get().release());
+ return createBinary(Buff, Context);
}
-ErrorOr<Archive*> Archive::create(MemoryBuffer *Source) {
- error_code EC;
- std::unique_ptr<Archive> Ret(new Archive(Source, EC));
+ErrorOr<Archive *> Archive::create(std::unique_ptr<MemoryBuffer> Source) {
+ std::error_code EC;
+ std::unique_ptr<Archive> Ret(new Archive(std::move(Source), EC));
if (EC)
return EC;
return Ret.release();
}
-Archive::Archive(MemoryBuffer *source, error_code &ec)
- : Binary(Binary::ID_Archive, source), SymbolTable(child_end()) {
+Archive::Archive(std::unique_ptr<MemoryBuffer> Source, std::error_code &ec)
+ : Binary(Binary::ID_Archive, std::move(Source)), SymbolTable(child_end()) {
// Check for sufficient magic.
- assert(source);
- if (source->getBufferSize() < 8 ||
- StringRef(source->getBufferStart(), 8) != Magic) {
+ if (Data->getBufferSize() < 8 ||
+ StringRef(Data->getBufferStart(), 8) != Magic) {
ec = object_error::invalid_file_type;
return;
}
@@ -255,9 +245,11 @@ Archive::Archive(MemoryBuffer *source, error_code &ec)
if (Name.startswith("#1/")) {
Format = K_BSD;
// We know this is BSD, so getName will work since there is no string table.
- ec = i->getName(Name);
+ ErrorOr<StringRef> NameOrErr = i->getName();
+ ec = NameOrErr.getError();
if (ec)
return;
+ Name = NameOrErr.get();
if (Name == "__.SYMDEF SORTED") {
SymbolTable = i;
++i;
@@ -335,12 +327,11 @@ Archive::child_iterator Archive::child_end() const {
return Child(this, nullptr);
}
-error_code Archive::Symbol::getName(StringRef &Result) const {
- Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex);
- return object_error::success;
+StringRef Archive::Symbol::getName() const {
+ return Parent->SymbolTable->getBuffer().begin() + StringIndex;
}
-error_code Archive::Symbol::getMember(child_iterator &Result) const {
+ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
const char *Buf = Parent->SymbolTable->getBuffer().begin();
const char *Offsets = Buf + 4;
uint32_t Offset = 0;
@@ -348,7 +339,14 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const {
Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
+ SymbolIndex);
} else if (Parent->kind() == K_BSD) {
- llvm_unreachable("BSD format is not supported");
+ // The SymbolIndex is an index into the ranlib structs that start at
+ // Offsets (the first uint32_t is the number of bytes of the ranlib
+ // structs). The ranlib structs are a pair of uint32_t's the first
+ // being a string table offset and the second being the offset into
+ // the archive of the member that defines the symbol. Which is what
+ // is needed here.
+ Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) +
+ (SymbolIndex * 2) + 1);
} else {
uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
@@ -380,16 +378,49 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const {
}
const char *Loc = Parent->getData().begin() + Offset;
- Result = Child(Parent, Loc);
-
- return object_error::success;
+ child_iterator Iter(Child(Parent, Loc));
+ return Iter;
}
Archive::Symbol Archive::Symbol::getNext() const {
Symbol t(*this);
- // Go to one past next null.
- t.StringIndex =
- Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
+ if (Parent->kind() == K_BSD) {
+ // t.StringIndex is an offset from the start of the __.SYMDEF or
+ // "__.SYMDEF SORTED" member into the string table for the ranlib
+ // struct indexed by t.SymbolIndex . To change t.StringIndex to the
+ // offset in the string table for t.SymbolIndex+1 we subtract the
+ // its offset from the start of the string table for t.SymbolIndex
+ // and add the offset of the string table for t.SymbolIndex+1.
+
+ // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
+ // which is the number of bytes of ranlib structs that follow. The ranlib
+ // structs are a pair of uint32_t's the first being a string table offset
+ // and the second being the offset into the archive of the member that
+ // define the symbol. After that the next uint32_t is the byte count of
+ // the string table followed by the string table.
+ const char *Buf = Parent->SymbolTable->getBuffer().begin();
+ uint32_t RanlibCount = 0;
+ RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) /
+ (sizeof(uint32_t) * 2);
+ // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
+ // don't change the t.StringIndex as we don't want to reference a ranlib
+ // past RanlibCount.
+ if (t.SymbolIndex + 1 < RanlibCount) {
+ const char *Ranlibs = Buf + 4;
+ uint32_t CurRanStrx = 0;
+ uint32_t NextRanStrx = 0;
+ CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
+ (t.SymbolIndex * 2));
+ NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
+ ((t.SymbolIndex + 1) * 2));
+ t.StringIndex -= CurRanStrx;
+ t.StringIndex += NextRanStrx;
+ }
+ } else {
+ // Go to one past next null.
+ t.StringIndex =
+ Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
+ }
++t.SymbolIndex;
return t;
}
@@ -404,7 +435,22 @@ Archive::symbol_iterator Archive::symbol_begin() const {
symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
} else if (kind() == K_BSD) {
- llvm_unreachable("BSD archive format is not supported");
+ // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
+ // which is the number of bytes of ranlib structs that follow. The ranlib
+ // structs are a pair of uint32_t's the first being a string table offset
+ // and the second being the offset into the archive of the member that
+ // define the symbol. After that the next uint32_t is the byte count of
+ // the string table followed by the string table.
+ uint32_t ranlib_count = 0;
+ ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
+ (sizeof(uint32_t) * 2);
+ const char *ranlibs = buf + 4;
+ uint32_t ran_strx = 0;
+ ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs));
+ buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
+ // Skip the byte count of the string table.
+ buf += sizeof(uint32_t);
+ buf += ran_strx;
} else {
uint32_t member_count = 0;
uint32_t symbol_count = 0;
@@ -426,7 +472,8 @@ Archive::symbol_iterator Archive::symbol_end() const {
if (kind() == K_GNU) {
symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
} else if (kind() == K_BSD) {
- llvm_unreachable("BSD archive format is not supported");
+ symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
+ (sizeof(uint32_t) * 2);
} else {
uint32_t member_count = 0;
member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
@@ -440,16 +487,15 @@ Archive::symbol_iterator Archive::symbol_end() const {
Archive::child_iterator Archive::findSym(StringRef name) const {
Archive::symbol_iterator bs = symbol_begin();
Archive::symbol_iterator es = symbol_end();
- Archive::child_iterator result;
-
- StringRef symname;
+
for (; bs != es; ++bs) {
- if (bs->getName(symname))
- return child_end();
- if (symname == name) {
- if (bs->getMember(result))
+ StringRef SymName = bs->getName();
+ if (SymName == name) {
+ ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
+ // FIXME: Should we really eat the error?
+ if (ResultOrErr.getError())
return child_end();
- return result;
+ return ResultOrErr.get();
}
}
return child_end();
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
index 63fd3ed..9f6a685 100644
--- a/lib/Object/Binary.cpp
+++ b/lib/Object/Binary.cpp
@@ -25,13 +25,10 @@
using namespace llvm;
using namespace object;
-Binary::~Binary() {
- if (BufferOwned)
- delete Data;
-}
+Binary::~Binary() {}
-Binary::Binary(unsigned int Type, MemoryBuffer *Source, bool BufferOwned)
- : TypeID(Type), BufferOwned(BufferOwned), Data(Source) {}
+Binary::Binary(unsigned int Type, std::unique_ptr<MemoryBuffer> Source)
+ : TypeID(Type), Data(std::move(Source)) {}
StringRef Binary::getData() const {
return Data->getBuffer();
@@ -41,14 +38,13 @@ StringRef Binary::getFileName() const {
return Data->getBufferIdentifier();
}
-ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source,
+ErrorOr<Binary *> object::createBinary(std::unique_ptr<MemoryBuffer> &Buffer,
LLVMContext *Context) {
- std::unique_ptr<MemoryBuffer> scopedSource(Source);
- sys::fs::file_magic Type = sys::fs::identify_magic(Source->getBuffer());
+ sys::fs::file_magic Type = sys::fs::identify_magic(Buffer->getBuffer());
switch (Type) {
case sys::fs::file_magic::archive:
- return Archive::create(scopedSource.release());
+ return Archive::create(std::move(Buffer));
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
@@ -67,10 +63,9 @@ ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source,
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
case sys::fs::file_magic::bitcode:
- return ObjectFile::createSymbolicFile(scopedSource.release(), true, Type,
- Context);
+ return ObjectFile::createSymbolicFile(Buffer, Type, Context);
case sys::fs::file_magic::macho_universal_binary:
- return MachOUniversalBinary::create(scopedSource.release());
+ return MachOUniversalBinary::create(std::move(Buffer));
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::windows_resource:
// Unrecognized object file format.
@@ -80,8 +75,9 @@ ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source,
}
ErrorOr<Binary *> object::createBinary(StringRef Path) {
- std::unique_ptr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, File))
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(Path);
+ if (std::error_code EC = FileOrErr.getError())
return EC;
- return createBinary(File.release());
+ return createBinary(FileOrErr.get());
}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index cd8c9ef..5b08e42 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -12,7 +12,6 @@ add_llvm_library(LLVMObject
MachOUniversal.cpp
Object.cpp
ObjectFile.cpp
- StringTableBuilder.cpp
+ RecordStreamer.cpp
SymbolicFile.cpp
- YAML.cpp
)
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 262c040..46ef87d 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -31,8 +31,9 @@ using support::ulittle32_t;
using support::little16_t;
// Returns false if size is greater than the buffer size. And sets ec.
-static bool checkSize(const MemoryBuffer *M, error_code &EC, uint64_t Size) {
- if (M->getBufferSize() < Size) {
+static bool checkSize(const MemoryBuffer &M, std::error_code &EC,
+ uint64_t Size) {
+ if (M.getBufferSize() < Size) {
EC = object_error::unexpected_eof;
return false;
}
@@ -41,13 +42,13 @@ static bool checkSize(const MemoryBuffer *M, error_code &EC, uint64_t Size) {
// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
// Returns unexpected_eof if error.
-template<typename T>
-static error_code getObject(const T *&Obj, const MemoryBuffer *M,
- const uint8_t *Ptr, const size_t Size = sizeof(T)) {
+template <typename T>
+static std::error_code getObject(const T *&Obj, const MemoryBuffer &M,
+ const uint8_t *Ptr,
+ const size_t Size = sizeof(T)) {
uintptr_t Addr = uintptr_t(Ptr);
- if (Addr + Size < Addr ||
- Addr + Size < Size ||
- Addr + Size > uintptr_t(M->getBufferEnd())) {
+ if (Addr + Size < Addr || Addr + Size < Size ||
+ Addr + Size > uintptr_t(M.getBufferEnd())) {
return object_error::unexpected_eof;
}
Obj = reinterpret_cast<const T *>(Addr);
@@ -129,17 +130,17 @@ void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const {
Ref.p = reinterpret_cast<uintptr_t>(Symb);
}
-error_code COFFObjectFile::getSymbolName(DataRefImpl Ref,
- StringRef &Result) const {
+std::error_code COFFObjectFile::getSymbolName(DataRefImpl Ref,
+ StringRef &Result) const {
const coff_symbol *Symb = toSymb(Ref);
return getSymbolName(Symb, Result);
}
-error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
- uint64_t &Result) const {
+std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
+ uint64_t &Result) const {
const coff_symbol *Symb = toSymb(Ref);
const coff_section *Section = nullptr;
- if (error_code EC = getSection(Symb->SectionNumber, Section))
+ if (std::error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
@@ -151,8 +152,8 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
return object_error::success;
}
-error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
- SymbolRef::Type &Result) const {
+std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
+ SymbolRef::Type &Result) const {
const coff_symbol *Symb = toSymb(Ref);
Result = SymbolRef::ST_Other;
if (Symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
@@ -164,7 +165,7 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
uint32_t Characteristics = 0;
if (!COFF::isReservedSectionNumber(Symb->SectionNumber)) {
const coff_section *Section = nullptr;
- if (error_code EC = getSection(Symb->SectionNumber, Section))
+ if (std::error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
Characteristics = Section->Characteristics;
}
@@ -202,14 +203,14 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const {
return Result;
}
-error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
- uint64_t &Result) const {
+std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
+ uint64_t &Result) const {
// FIXME: Return the correct size. This requires looking at all the symbols
// in the same section as this symbol, and looking for either the next
// symbol, or the end of the section.
const coff_symbol *Symb = toSymb(Ref);
const coff_section *Section = nullptr;
- if (error_code EC = getSection(Symb->SectionNumber, Section))
+ if (std::error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
@@ -221,14 +222,16 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
return object_error::success;
}
-error_code COFFObjectFile::getSymbolSection(DataRefImpl Ref,
- section_iterator &Result) const {
+std::error_code
+COFFObjectFile::getSymbolSection(DataRefImpl Ref,
+ section_iterator &Result) const {
const coff_symbol *Symb = toSymb(Ref);
if (COFF::isReservedSectionNumber(Symb->SectionNumber)) {
Result = section_end();
} else {
const coff_section *Sec = nullptr;
- if (error_code EC = getSection(Symb->SectionNumber, Sec)) return EC;
+ if (std::error_code EC = getSection(Symb->SectionNumber, Sec))
+ return EC;
DataRefImpl Ref;
Ref.p = reinterpret_cast<uintptr_t>(Sec);
Result = section_iterator(SectionRef(Ref, this));
@@ -242,37 +245,37 @@ void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const {
Ref.p = reinterpret_cast<uintptr_t>(Sec);
}
-error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
- StringRef &Result) const {
+std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
+ StringRef &Result) const {
const coff_section *Sec = toSec(Ref);
return getSectionName(Sec, Result);
}
-error_code COFFObjectFile::getSectionAddress(DataRefImpl Ref,
- uint64_t &Result) const {
+std::error_code COFFObjectFile::getSectionAddress(DataRefImpl Ref,
+ uint64_t &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->VirtualAddress;
return object_error::success;
}
-error_code COFFObjectFile::getSectionSize(DataRefImpl Ref,
- uint64_t &Result) const {
+std::error_code COFFObjectFile::getSectionSize(DataRefImpl Ref,
+ uint64_t &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->SizeOfRawData;
return object_error::success;
}
-error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
- StringRef &Result) const {
+std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref,
+ StringRef &Result) const {
const coff_section *Sec = toSec(Ref);
ArrayRef<uint8_t> Res;
- error_code EC = getSectionContents(Sec, Res);
+ std::error_code EC = getSectionContents(Sec, Res);
Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
return EC;
}
-error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref,
- uint64_t &Res) const {
+std::error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref,
+ uint64_t &Res) const {
const coff_section *Sec = toSec(Ref);
if (!Sec)
return object_error::parse_failed;
@@ -280,62 +283,64 @@ error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref,
return object_error::success;
}
-error_code COFFObjectFile::isSectionText(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionText(DataRefImpl Ref,
+ bool &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
return object_error::success;
}
-error_code COFFObjectFile::isSectionData(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionData(DataRefImpl Ref,
+ bool &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA;
return object_error::success;
}
-error_code COFFObjectFile::isSectionBSS(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionBSS(DataRefImpl Ref,
+ bool &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
return object_error::success;
}
-error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref,
- bool &Result) const {
+std::error_code
+COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref,
+ bool &Result) const {
// FIXME: Unimplemented
Result = true;
return object_error::success;
}
-error_code COFFObjectFile::isSectionVirtual(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionVirtual(DataRefImpl Ref,
+ bool &Result) const {
const coff_section *Sec = toSec(Ref);
Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
return object_error::success;
}
-error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Ref,
+ bool &Result) const {
// FIXME: Unimplemented.
Result = false;
return object_error::success;
}
-error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref,
- bool &Result) const {
+std::error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref,
+ bool &Result) const {
// FIXME: Unimplemented.
Result = false;
return object_error::success;
}
-error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
- DataRefImpl SymbRef,
- bool &Result) const {
+std::error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
+ DataRefImpl SymbRef,
+ bool &Result) const {
const coff_section *Sec = toSec(SecRef);
const coff_symbol *Symb = toSymb(SymbRef);
const coff_section *SymbSec = nullptr;
- if (error_code EC = getSection(Symb->SectionNumber, SymbSec)) return EC;
+ if (std::error_code EC = getSection(Symb->SectionNumber, SymbSec))
+ return EC;
if (SymbSec == Sec)
Result = true;
else
@@ -390,9 +395,9 @@ relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const {
}
// Initialize the pointer to the symbol table.
-error_code COFFObjectFile::initSymbolTablePtr() {
- if (error_code EC = getObject(
- SymbolTable, Data, base() + COFFHeader->PointerToSymbolTable,
+std::error_code COFFObjectFile::initSymbolTablePtr() {
+ if (std::error_code EC = getObject(
+ SymbolTable, *Data, base() + COFFHeader->PointerToSymbolTable,
COFFHeader->NumberOfSymbols * sizeof(coff_symbol)))
return EC;
@@ -403,11 +408,12 @@ error_code COFFObjectFile::initSymbolTablePtr() {
base() + COFFHeader->PointerToSymbolTable +
COFFHeader->NumberOfSymbols * sizeof(coff_symbol);
const ulittle32_t *StringTableSizePtr;
- if (error_code EC = getObject(StringTableSizePtr, Data, StringTableAddr))
+ if (std::error_code EC =
+ getObject(StringTableSizePtr, *Data, StringTableAddr))
return EC;
StringTableSize = *StringTableSizePtr;
- if (error_code EC =
- getObject(StringTable, Data, StringTableAddr, StringTableSize))
+ if (std::error_code EC =
+ getObject(StringTable, *Data, StringTableAddr, StringTableSize))
return EC;
// Treat table sizes < 4 as empty because contrary to the PECOFF spec, some
@@ -422,7 +428,7 @@ error_code COFFObjectFile::initSymbolTablePtr() {
}
// Returns the file offset for the given VA.
-error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
+std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
uint64_t ImageBase = PE32Header ? (uint64_t)PE32Header->ImageBase
: (uint64_t)PE32PlusHeader->ImageBase;
uint64_t Rva = Addr - ImageBase;
@@ -431,7 +437,7 @@ error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
}
// Returns the file offset for the given RVA.
-error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
+std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
for (const SectionRef &S : sections()) {
const coff_section *Section = getCOFFSection(S);
uint32_t SectionStart = Section->VirtualAddress;
@@ -447,10 +453,10 @@ error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const {
// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name
// table entry.
-error_code COFFObjectFile::
-getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const {
+std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint,
+ StringRef &Name) const {
uintptr_t IntPtr = 0;
- if (error_code EC = getRvaPtr(Rva, IntPtr))
+ if (std::error_code EC = getRvaPtr(Rva, IntPtr))
return EC;
const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr);
Hint = *reinterpret_cast<const ulittle16_t *>(Ptr);
@@ -459,7 +465,7 @@ getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const {
}
// Find the import table.
-error_code COFFObjectFile::initImportTablePtr() {
+std::error_code COFFObjectFile::initImportTablePtr() {
// First, we get the RVA of the import table. If the file lacks a pointer to
// the import table, do nothing.
const data_directory *DataEntry;
@@ -477,7 +483,7 @@ error_code COFFObjectFile::initImportTablePtr() {
// Find the section that contains the RVA. This is needed because the RVA is
// the import table's memory address which is different from its file offset.
uintptr_t IntPtr = 0;
- if (error_code EC = getRvaPtr(ImportTableRva, IntPtr))
+ if (std::error_code EC = getRvaPtr(ImportTableRva, IntPtr))
return EC;
ImportDirectory = reinterpret_cast<
const import_directory_table_entry *>(IntPtr);
@@ -485,7 +491,7 @@ error_code COFFObjectFile::initImportTablePtr() {
}
// Find the export table.
-error_code COFFObjectFile::initExportTablePtr() {
+std::error_code COFFObjectFile::initExportTablePtr() {
// First, we get the RVA of the export table. If the file lacks a pointer to
// the export table, do nothing.
const data_directory *DataEntry;
@@ -498,22 +504,23 @@ error_code COFFObjectFile::initExportTablePtr() {
uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress;
uintptr_t IntPtr = 0;
- if (error_code EC = getRvaPtr(ExportTableRva, IntPtr))
+ if (std::error_code EC = getRvaPtr(ExportTableRva, IntPtr))
return EC;
ExportDirectory =
reinterpret_cast<const export_directory_table_entry *>(IntPtr);
return object_error::success;
}
-COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
- bool BufferOwned)
- : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(nullptr),
+COFFObjectFile::COFFObjectFile(std::unique_ptr<MemoryBuffer> Object,
+ std::error_code &EC)
+ : ObjectFile(Binary::ID_COFF, std::move(Object)), COFFHeader(nullptr),
PE32Header(nullptr), PE32PlusHeader(nullptr), DataDirectory(nullptr),
SectionTable(nullptr), SymbolTable(nullptr), StringTable(nullptr),
StringTableSize(0), ImportDirectory(nullptr), NumberOfImportDirectory(0),
ExportDirectory(nullptr) {
// Check that we at least have enough room for a header.
- if (!checkSize(Data, EC, sizeof(coff_file_header))) return;
+ if (!checkSize(*Data, EC, sizeof(coff_file_header)))
+ return;
// The current location in the file where we are looking at.
uint64_t CurPtr = 0;
@@ -526,7 +533,8 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
if (base()[0] == 0x4d && base()[1] == 0x5a) {
// PE/COFF, seek through MS-DOS compatibility stub and 4-byte
// PE signature to find 'normal' COFF header.
- if (!checkSize(Data, EC, 0x3c + 8)) return;
+ if (!checkSize(*Data, EC, 0x3c + 8))
+ return;
CurPtr = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c);
// Check the PE magic bytes. ("PE\0\0")
if (std::memcmp(base() + CurPtr, "PE\0\0", 4) != 0) {
@@ -537,13 +545,13 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
HasPEHeader = true;
}
- if ((EC = getObject(COFFHeader, Data, base() + CurPtr)))
+ if ((EC = getObject(COFFHeader, *Data, base() + CurPtr)))
return;
CurPtr += sizeof(coff_file_header);
if (HasPEHeader) {
const pe32_header *Header;
- if ((EC = getObject(Header, Data, base() + CurPtr)))
+ if ((EC = getObject(Header, *Data, base() + CurPtr)))
return;
const uint8_t *DataDirAddr;
@@ -561,7 +569,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
EC = object_error::parse_failed;
return;
}
- if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize)))
+ if ((EC = getObject(DataDirectory, *Data, DataDirAddr, DataDirSize)))
return;
CurPtr += COFFHeader->SizeOfOptionalHeader;
}
@@ -569,7 +577,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
if (COFFHeader->isImportLibrary())
return;
- if ((EC = getObject(SectionTable, Data, base() + CurPtr,
+ if ((EC = getObject(SectionTable, *Data, base() + CurPtr,
COFFHeader->NumberOfSections * sizeof(coff_section))))
return;
@@ -686,28 +694,30 @@ unsigned COFFObjectFile::getArch() const {
// This method is kept here because lld uses this. As soon as we make
// lld to use getCOFFHeader, this method will be removed.
-error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
+std::error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
return getCOFFHeader(Res);
}
-error_code COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
+std::error_code
+COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const {
Res = COFFHeader;
return object_error::success;
}
-error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
+std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const {
Res = PE32Header;
return object_error::success;
}
-error_code
+std::error_code
COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const {
Res = PE32PlusHeader;
return object_error::success;
}
-error_code COFFObjectFile::getDataDirectory(uint32_t Index,
- const data_directory *&Res) const {
+std::error_code
+COFFObjectFile::getDataDirectory(uint32_t Index,
+ const data_directory *&Res) const {
// Error if if there's no data directory or the index is out of range.
if (!DataDirectory)
return object_error::parse_failed;
@@ -720,8 +730,8 @@ error_code COFFObjectFile::getDataDirectory(uint32_t Index,
return object_error::success;
}
-error_code COFFObjectFile::getSection(int32_t Index,
- const coff_section *&Result) const {
+std::error_code COFFObjectFile::getSection(int32_t Index,
+ const coff_section *&Result) const {
// Check for special index values.
if (COFF::isReservedSectionNumber(Index))
Result = nullptr;
@@ -733,8 +743,8 @@ error_code COFFObjectFile::getSection(int32_t Index,
return object_error::success;
}
-error_code COFFObjectFile::getString(uint32_t Offset,
- StringRef &Result) const {
+std::error_code COFFObjectFile::getString(uint32_t Offset,
+ StringRef &Result) const {
if (StringTableSize <= 4)
// Tried to get a string from an empty string table.
return object_error::parse_failed;
@@ -744,8 +754,8 @@ error_code COFFObjectFile::getString(uint32_t Offset,
return object_error::success;
}
-error_code COFFObjectFile::getSymbol(uint32_t Index,
- const coff_symbol *&Result) const {
+std::error_code COFFObjectFile::getSymbol(uint32_t Index,
+ const coff_symbol *&Result) const {
if (Index < COFFHeader->NumberOfSymbols)
Result = SymbolTable + Index;
else
@@ -753,12 +763,12 @@ error_code COFFObjectFile::getSymbol(uint32_t Index,
return object_error::success;
}
-error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol,
- StringRef &Res) const {
+std::error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol,
+ StringRef &Res) const {
// Check for string table entry. First 4 bytes are 0.
if (Symbol->Name.Offset.Zeroes == 0) {
uint32_t Offset = Symbol->Name.Offset.Offset;
- if (error_code EC = getString(Offset, Res))
+ if (std::error_code EC = getString(Offset, Res))
return EC;
return object_error::success;
}
@@ -795,8 +805,8 @@ ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData(
Symbol->NumberOfAuxSymbols * sizeof(coff_symbol));
}
-error_code COFFObjectFile::getSectionName(const coff_section *Sec,
- StringRef &Res) const {
+std::error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+ StringRef &Res) const {
StringRef Name;
if (Sec->Name[7] == 0)
// Null terminated, let ::strlen figure out the length.
@@ -815,7 +825,7 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec,
if (Name.substr(1).getAsInteger(10, Offset))
return object_error::parse_failed;
}
- if (error_code EC = getString(Offset, Name))
+ if (std::error_code EC = getString(Offset, Name))
return EC;
}
@@ -823,8 +833,9 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec,
return object_error::success;
}
-error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
- ArrayRef<uint8_t> &Res) const {
+std::error_code
+COFFObjectFile::getSectionContents(const coff_section *Sec,
+ ArrayRef<uint8_t> &Res) const {
// The only thing that we need to verify is that the contents is contained
// within the file bounds. We don't need to make sure it doesn't cover other
// data, as there's nothing that says that is not allowed.
@@ -846,13 +857,13 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
reinterpret_cast<const coff_relocation*>(Rel.p) + 1);
}
-error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
- uint64_t &Res) const {
+std::error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
report_fatal_error("getRelocationAddress not implemented in COFFObjectFile");
}
-error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
- uint64_t &Res) const {
+std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
Res = toRel(Rel)->VirtualAddress;
return object_error::success;
}
@@ -864,8 +875,8 @@ symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
return symbol_iterator(SymbolRef(Ref, this));
}
-error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
- uint64_t &Res) const {
+std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
+ uint64_t &Res) const {
const coff_relocation* R = toRel(Rel);
Res = R->Type;
return object_error::success;
@@ -891,8 +902,9 @@ COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const {
Res = #reloc_type; \
break;
-error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
+std::error_code
+COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
const coff_relocation *Reloc = toRel(Rel);
StringRef Res;
switch (COFFHeader->Machine) {
@@ -966,26 +978,29 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
#undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME
-error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
- SmallVectorImpl<char> &Result) const {
+std::error_code
+COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const {
const coff_relocation *Reloc = toRel(Rel);
const coff_symbol *Symb = nullptr;
- if (error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb)) return EC;
+ if (std::error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb))
+ return EC;
DataRefImpl Sym;
Sym.p = reinterpret_cast<uintptr_t>(Symb);
StringRef SymName;
- if (error_code EC = getSymbolName(Sym, SymName)) return EC;
+ if (std::error_code EC = getSymbolName(Sym, SymName))
+ return EC;
Result.append(SymName.begin(), SymName.end());
return object_error::success;
}
-error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
- LibraryRef &Result) const {
+std::error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Result) const {
report_fatal_error("getLibraryNext not implemented in COFFObjectFile");
}
-error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
- StringRef &Result) const {
+std::error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Result) const {
report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
}
@@ -998,24 +1013,25 @@ void ImportDirectoryEntryRef::moveNext() {
++Index;
}
-error_code ImportDirectoryEntryRef::
-getImportTableEntry(const import_directory_table_entry *&Result) const {
+std::error_code ImportDirectoryEntryRef::getImportTableEntry(
+ const import_directory_table_entry *&Result) const {
Result = ImportTable;
return object_error::success;
}
-error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
+std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (error_code EC = OwningObject->getRvaPtr(ImportTable->NameRVA, IntPtr))
+ if (std::error_code EC =
+ OwningObject->getRvaPtr(ImportTable->NameRVA, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return object_error::success;
}
-error_code ImportDirectoryEntryRef::getImportLookupEntry(
+std::error_code ImportDirectoryEntryRef::getImportLookupEntry(
const import_lookup_table_entry32 *&Result) const {
uintptr_t IntPtr = 0;
- if (error_code EC =
+ if (std::error_code EC =
OwningObject->getRvaPtr(ImportTable->ImportLookupTableRVA, IntPtr))
return EC;
Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr);
@@ -1033,31 +1049,33 @@ void ExportDirectoryEntryRef::moveNext() {
// Returns the name of the current export symbol. If the symbol is exported only
// by ordinal, the empty string is set as a result.
-error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const {
+std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (error_code EC = OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr))
+ if (std::error_code EC =
+ OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return object_error::success;
}
// Returns the starting ordinal number.
-error_code ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const {
+std::error_code
+ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const {
Result = ExportTable->OrdinalBase;
return object_error::success;
}
// Returns the export ordinal of the current export symbol.
-error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const {
+std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const {
Result = ExportTable->OrdinalBase + Index;
return object_error::success;
}
// Returns the address of the current export symbol.
-error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
+std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
uintptr_t IntPtr = 0;
- if (error_code EC = OwningObject->getRvaPtr(
- ExportTable->ExportAddressTableRVA, IntPtr))
+ if (std::error_code EC =
+ OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr))
return EC;
const export_address_table_entry *entry =
reinterpret_cast<const export_address_table_entry *>(IntPtr);
@@ -1067,10 +1085,11 @@ error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const {
// Returns the name of the current export symbol. If the symbol is exported only
// by ordinal, the empty string is set as a result.
-error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
+std::error_code
+ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
uintptr_t IntPtr = 0;
- if (error_code EC = OwningObject->getRvaPtr(
- ExportTable->OrdinalTableRVA, IntPtr))
+ if (std::error_code EC =
+ OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr))
return EC;
const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr);
@@ -1080,11 +1099,11 @@ error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
I < E; ++I, ++Offset) {
if (*I != Index)
continue;
- if (error_code EC = OwningObject->getRvaPtr(
- ExportTable->NamePointerRVA, IntPtr))
+ if (std::error_code EC =
+ OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr))
return EC;
const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr);
- if (error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr))
+ if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr))
return EC;
Result = StringRef(reinterpret_cast<const char *>(IntPtr));
return object_error::success;
@@ -1093,11 +1112,11 @@ error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
return object_error::success;
}
-ErrorOr<ObjectFile *> ObjectFile::createCOFFObjectFile(MemoryBuffer *Object,
- bool BufferOwned) {
- error_code EC;
+ErrorOr<ObjectFile *>
+ObjectFile::createCOFFObjectFile(std::unique_ptr<MemoryBuffer> Object) {
+ std::error_code EC;
std::unique_ptr<COFFObjectFile> Ret(
- new COFFObjectFile(Object, EC, BufferOwned));
+ new COFFObjectFile(std::move(Object), EC));
if (EC)
return EC;
return Ret.release();
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index a2c4df2..4f0f60b 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -17,65 +17,66 @@
namespace llvm {
using namespace object;
-ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj,
- bool BufferOwned) {
- std::pair<unsigned char, unsigned char> Ident = getElfArchType(Obj);
+ErrorOr<ObjectFile *>
+ObjectFile::createELFObjectFile(std::unique_ptr<MemoryBuffer> &Obj) {
+ std::pair<unsigned char, unsigned char> Ident =
+ getElfArchType(Obj->getBuffer());
std::size_t MaxAlignment =
1ULL << countTrailingZeros(uintptr_t(Obj->getBufferStart()));
- error_code EC;
+ std::error_code EC;
std::unique_ptr<ObjectFile> R;
if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::little, 4, false> >(
- Obj, EC, BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>(
+ std::move(Obj), EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, false> >(
- Obj, EC, BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>(
+ std::move(Obj), EC));
else
- llvm_unreachable("Invalid alignment for ELF file!");
+ return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 4)
- R.reset(new ELFObjectFile<ELFType<support::big, 4, false> >(Obj, EC,
- BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(std::move(Obj),
+ EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, false> >(Obj, EC,
- BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(std::move(Obj),
+ EC));
else
- llvm_unreachable("Invalid alignment for ELF file!");
+ return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::big, 8, true> >(Obj, EC,
- BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(std::move(Obj),
+ EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::big, 2, true> >(Obj, EC,
- BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(std::move(Obj),
+ EC));
else
- llvm_unreachable("Invalid alignment for ELF file!");
+ return object_error::parse_failed;
else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
#if !LLVM_IS_UNALIGNED_ACCESS_FAST
if (MaxAlignment >= 8)
- R.reset(new ELFObjectFile<ELFType<support::little, 8, true> >(
- Obj, EC, BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>(
+ std::move(Obj), EC));
else
#endif
if (MaxAlignment >= 2)
- R.reset(new ELFObjectFile<ELFType<support::little, 2, true> >(
- Obj, EC, BufferOwned));
+ R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>(
+ std::move(Obj), EC));
else
- llvm_unreachable("Invalid alignment for ELF file!");
+ return object_error::parse_failed;
}
else
- report_fatal_error("Buffer is not an ELF object file!");
+ llvm_unreachable("Buffer is not an ELF object file!");
if (EC)
return EC;
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index 7d50f23..dc3d467 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -368,6 +368,16 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
#undef ECase
}
+void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration(
+ IO &IO, ELFYAML::ELF_STV &Value) {
+#define ECase(X) IO.enumCase(Value, #X, ELF::X);
+ ECase(STV_DEFAULT)
+ ECase(STV_INTERNAL)
+ ECase(STV_HIDDEN)
+ ECase(STV_PROTECTED)
+#undef ECase
+}
+
void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
IO &IO, ELFYAML::ELF_REL &Value) {
const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
@@ -649,6 +659,7 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) {
IO.mapOptional("Section", Symbol.Section, StringRef());
IO.mapOptional("Value", Symbol.Value, Hex64(0));
IO.mapOptional("Size", Symbol.Size, Hex64(0));
+ IO.mapOptional("Visibility", Symbol.Visibility, ELFYAML::ELF_STV(0));
}
void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping(
@@ -664,7 +675,6 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0));
IO.mapOptional("Address", Section.Address, Hex64(0));
IO.mapOptional("Link", Section.Link, StringRef());
- IO.mapOptional("Info", Section.Info, StringRef());
IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
}
@@ -676,6 +686,7 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
commonSectionMapping(IO, Section);
+ IO.mapOptional("Info", Section.Info, StringRef());
IO.mapOptional("Relocations", Section.Relocations);
}
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 8e50869..9d25269 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -18,11 +18,10 @@ using namespace llvm;
using namespace object;
namespace {
-class _object_error_category : public error_category {
+class _object_error_category : public std::error_category {
public:
- const char* name() const override;
+ const char* name() const LLVM_NOEXCEPT override;
std::string message(int ev) const override;
- error_condition default_error_condition(int ev) const override;
};
}
@@ -30,8 +29,8 @@ const char *_object_error_category::name() const {
return "llvm.object";
}
-std::string _object_error_category::message(int ev) const {
- object_error::Impl E = static_cast<object_error::Impl>(ev);
+std::string _object_error_category::message(int EV) const {
+ object_error E = static_cast<object_error>(EV);
switch (E) {
case object_error::success: return "Success";
case object_error::arch_not_found:
@@ -47,13 +46,7 @@ std::string _object_error_category::message(int ev) const {
"defined.");
}
-error_condition _object_error_category::default_error_condition(int ev) const {
- if (ev == object_error::success)
- return errc::success;
- return errc::invalid_argument;
-}
-
-const error_category &object::object_category() {
+const std::error_category &object::object_category() {
static _object_error_category o;
return o;
}
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index a8aba26..5323d92 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -11,34 +11,119 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Object/IRObjectFile.h"
+#include "RecordStreamer.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
-#include "llvm/Object/IRObjectFile.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
-IRObjectFile::IRObjectFile(MemoryBuffer *Object, error_code &EC,
- LLVMContext &Context, bool BufferOwned)
- : SymbolicFile(Binary::ID_IR, Object, BufferOwned) {
- ErrorOr<Module*> MOrErr = parseBitcodeFile(Object, Context);
- if ((EC = MOrErr.getError()))
- return;
-
- M.reset(MOrErr.get());
-
+IRObjectFile::IRObjectFile(std::unique_ptr<MemoryBuffer> Object,
+ std::unique_ptr<Module> Mod)
+ : SymbolicFile(Binary::ID_IR, std::move(Object)), M(std::move(Mod)) {
// If we have a DataLayout, setup a mangler.
const DataLayout *DL = M->getDataLayout();
if (!DL)
return;
Mang.reset(new Mangler(DL));
+
+ const std::string &InlineAsm = M->getModuleInlineAsm();
+ if (InlineAsm.empty())
+ return;
+
+ StringRef Triple = M->getTargetTriple();
+ std::string Err;
+ const Target *T = TargetRegistry::lookupTarget(Triple, Err);
+ if (!T)
+ return;
+
+ std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
+ if (!MRI)
+ return;
+
+ std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, Triple));
+ if (!MAI)
+ return;
+
+ std::unique_ptr<MCSubtargetInfo> STI(
+ T->createMCSubtargetInfo(Triple, "", ""));
+ if (!STI)
+ return;
+
+ std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
+ if (!MCII)
+ return;
+
+ MCObjectFileInfo MOFI;
+ MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
+ MOFI.InitMCObjectFileInfo(Triple, Reloc::Default, CodeModel::Default, MCCtx);
+ std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx));
+
+ std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
+ SourceMgr SrcMgr;
+ SrcMgr.AddNewSourceBuffer(Buffer.release(), SMLoc());
+ std::unique_ptr<MCAsmParser> Parser(
+ createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI));
+
+ MCTargetOptions MCOptions;
+ std::unique_ptr<MCTargetAsmParser> TAP(
+ T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
+ if (!TAP)
+ return;
+
+ Parser->setTargetParser(*TAP);
+ if (Parser->Run(false))
+ return;
+
+ for (auto &KV : *Streamer) {
+ StringRef Key = KV.first();
+ RecordStreamer::State Value = KV.second;
+ uint32_t Res = BasicSymbolRef::SF_None;
+ switch (Value) {
+ case RecordStreamer::NeverSeen:
+ llvm_unreachable("foo");
+ case RecordStreamer::DefinedGlobal:
+ Res |= BasicSymbolRef::SF_Global;
+ break;
+ case RecordStreamer::Defined:
+ break;
+ case RecordStreamer::Global:
+ case RecordStreamer::Used:
+ Res |= BasicSymbolRef::SF_Undefined;
+ Res |= BasicSymbolRef::SF_Global;
+ break;
+ }
+ AsmSymbols.push_back(
+ std::make_pair<std::string, uint32_t>(Key, std::move(Res)));
+ }
}
-static const GlobalValue &getGV(DataRefImpl &Symb) {
- return *reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3));
+IRObjectFile::~IRObjectFile() {
+ GVMaterializer *GVM = M->getMaterializer();
+ if (GVM)
+ GVM->releaseBuffer();
+ }
+
+static const GlobalValue *getGV(DataRefImpl &Symb) {
+ if ((Symb.p & 3) == 3)
+ return nullptr;
+
+ return reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3));
}
static uintptr_t skipEmpty(Module::const_alias_iterator I, const Module &M) {
@@ -62,68 +147,109 @@ static uintptr_t skipEmpty(Module::const_iterator I, const Module &M) {
return reinterpret_cast<uintptr_t>(GV) | 0;
}
+static unsigned getAsmSymIndex(DataRefImpl Symb) {
+ assert((Symb.p & uintptr_t(3)) == 3);
+ uintptr_t Index = Symb.p & ~uintptr_t(3);
+ Index >>= 2;
+ return Index;
+}
+
void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
- const GlobalValue *GV = &getGV(Symb);
- const Module &M = *GV->getParent();
+ const GlobalValue *GV = getGV(Symb);
uintptr_t Res;
+
switch (Symb.p & 3) {
case 0: {
Module::const_iterator Iter(static_cast<const Function*>(GV));
++Iter;
- Res = skipEmpty(Iter, M);
+ Res = skipEmpty(Iter, *M);
break;
}
case 1: {
Module::const_global_iterator Iter(static_cast<const GlobalVariable*>(GV));
++Iter;
- Res = skipEmpty(Iter, M);
+ Res = skipEmpty(Iter, *M);
break;
}
case 2: {
Module::const_alias_iterator Iter(static_cast<const GlobalAlias*>(GV));
++Iter;
- Res = skipEmpty(Iter, M);
+ Res = skipEmpty(Iter, *M);
+ break;
+ }
+ case 3: {
+ unsigned Index = getAsmSymIndex(Symb);
+ assert(Index < AsmSymbols.size());
+ ++Index;
+ Res = (Index << 2) | 3;
break;
}
- case 3:
- llvm_unreachable("Invalid symbol reference");
}
Symb.p = Res;
}
-error_code IRObjectFile::printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const {
- const GlobalValue &GV = getGV(Symb);
+std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
+ DataRefImpl Symb) const {
+ const GlobalValue *GV = getGV(Symb);
+ if (!GV) {
+ unsigned Index = getAsmSymIndex(Symb);
+ assert(Index <= AsmSymbols.size());
+ OS << AsmSymbols[Index].first;
+ return object_error::success;;
+ }
if (Mang)
- Mang->getNameWithPrefix(OS, &GV, false);
+ Mang->getNameWithPrefix(OS, GV, false);
else
- OS << GV.getName();
+ OS << GV->getName();
return object_error::success;
}
+static bool isDeclaration(const GlobalValue &V) {
+ if (V.hasAvailableExternallyLinkage())
+ return true;
+
+ if (V.isMaterializable())
+ return false;
+
+ return V.isDeclaration();
+}
+
uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
- const GlobalValue &GV = getGV(Symb);
+ const GlobalValue *GV = getGV(Symb);
+
+ if (!GV) {
+ unsigned Index = getAsmSymIndex(Symb);
+ assert(Index <= AsmSymbols.size());
+ return AsmSymbols[Index].second;
+ }
uint32_t Res = BasicSymbolRef::SF_None;
- if (GV.isDeclaration() || GV.hasAvailableExternallyLinkage())
+ if (isDeclaration(*GV))
Res |= BasicSymbolRef::SF_Undefined;
- if (GV.hasPrivateLinkage())
+ if (GV->hasPrivateLinkage())
Res |= BasicSymbolRef::SF_FormatSpecific;
- if (!GV.hasLocalLinkage())
+ if (!GV->hasLocalLinkage())
Res |= BasicSymbolRef::SF_Global;
- if (GV.hasCommonLinkage())
+ if (GV->hasCommonLinkage())
Res |= BasicSymbolRef::SF_Common;
- if (GV.hasLinkOnceLinkage() || GV.hasWeakLinkage())
+ if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage())
Res |= BasicSymbolRef::SF_Weak;
+ if (GV->getName().startswith("llvm."))
+ Res |= BasicSymbolRef::SF_FormatSpecific;
+ else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
+ if (Var->getSection() == StringRef("llvm.metadata"))
+ Res |= BasicSymbolRef::SF_FormatSpecific;
+ }
+
return Res;
}
-const GlobalValue &IRObjectFile::getSymbolGV(DataRefImpl Symb) const {
- const GlobalValue &GV = getGV(Symb);
+const GlobalValue *IRObjectFile::getSymbolGV(DataRefImpl Symb) const {
+ const GlobalValue *GV = getGV(Symb);
return GV;
}
@@ -136,16 +262,18 @@ basic_symbol_iterator IRObjectFile::symbol_begin_impl() const {
basic_symbol_iterator IRObjectFile::symbol_end_impl() const {
DataRefImpl Ret;
- Ret.p = 3;
+ uint64_t NumAsm = AsmSymbols.size();
+ NumAsm <<= 2;
+ Ret.p = 3 | NumAsm;
return basic_symbol_iterator(BasicSymbolRef(Ret, this));
}
-ErrorOr<SymbolicFile *> llvm::object::SymbolicFile::createIRObjectFile(
- MemoryBuffer *Object, LLVMContext &Context, bool BufferOwned) {
- error_code EC;
- std::unique_ptr<IRObjectFile> Ret(
- new IRObjectFile(Object, EC, Context, BufferOwned));
- if (EC)
+ErrorOr<IRObjectFile *> llvm::object::IRObjectFile::createIRObjectFile(
+ std::unique_ptr<MemoryBuffer> Object, LLVMContext &Context) {
+ ErrorOr<Module *> MOrErr = getLazyBitcodeModule(Object.get(), Context);
+ if (std::error_code EC = MOrErr.getError())
return EC;
- return Ret.release();
+
+ std::unique_ptr<Module> M(MOrErr.get());
+ return new IRObjectFile(std::move(Object), std::move(M));
}
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index 7813832..8acacba 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Object
parent = Libraries
-required_libraries = BitReader Core Support
+required_libraries = BitReader Core Support MC MCParser
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index c6bab03..4919114 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
using namespace object;
namespace llvm {
+
namespace object {
struct nlist_base {
@@ -43,190 +44,195 @@ struct section_base {
};
template<typename T>
-static void SwapValue(T &Value) {
- Value = sys::SwapByteOrder(Value);
-}
-
-template<typename T>
static void SwapStruct(T &Value);
template<>
void SwapStruct(MachO::any_relocation_info &H) {
- SwapValue(H.r_word0);
- SwapValue(H.r_word1);
+ sys::swapByteOrder(H.r_word0);
+ sys::swapByteOrder(H.r_word1);
}
template<>
void SwapStruct(MachO::load_command &L) {
- SwapValue(L.cmd);
- SwapValue(L.cmdsize);
+ sys::swapByteOrder(L.cmd);
+ sys::swapByteOrder(L.cmdsize);
}
template<>
void SwapStruct(nlist_base &S) {
- SwapValue(S.n_strx);
- SwapValue(S.n_desc);
+ sys::swapByteOrder(S.n_strx);
+ sys::swapByteOrder(S.n_desc);
}
template<>
void SwapStruct(MachO::section &S) {
- SwapValue(S.addr);
- SwapValue(S.size);
- SwapValue(S.offset);
- SwapValue(S.align);
- SwapValue(S.reloff);
- SwapValue(S.nreloc);
- SwapValue(S.flags);
- SwapValue(S.reserved1);
- SwapValue(S.reserved2);
+ sys::swapByteOrder(S.addr);
+ sys::swapByteOrder(S.size);
+ sys::swapByteOrder(S.offset);
+ sys::swapByteOrder(S.align);
+ sys::swapByteOrder(S.reloff);
+ sys::swapByteOrder(S.nreloc);
+ sys::swapByteOrder(S.flags);
+ sys::swapByteOrder(S.reserved1);
+ sys::swapByteOrder(S.reserved2);
}
template<>
void SwapStruct(MachO::section_64 &S) {
- SwapValue(S.addr);
- SwapValue(S.size);
- SwapValue(S.offset);
- SwapValue(S.align);
- SwapValue(S.reloff);
- SwapValue(S.nreloc);
- SwapValue(S.flags);
- SwapValue(S.reserved1);
- SwapValue(S.reserved2);
- SwapValue(S.reserved3);
+ sys::swapByteOrder(S.addr);
+ sys::swapByteOrder(S.size);
+ sys::swapByteOrder(S.offset);
+ sys::swapByteOrder(S.align);
+ sys::swapByteOrder(S.reloff);
+ sys::swapByteOrder(S.nreloc);
+ sys::swapByteOrder(S.flags);
+ sys::swapByteOrder(S.reserved1);
+ sys::swapByteOrder(S.reserved2);
+ sys::swapByteOrder(S.reserved3);
}
template<>
void SwapStruct(MachO::nlist &S) {
- SwapValue(S.n_strx);
- SwapValue(S.n_desc);
- SwapValue(S.n_value);
+ sys::swapByteOrder(S.n_strx);
+ sys::swapByteOrder(S.n_desc);
+ sys::swapByteOrder(S.n_value);
}
template<>
void SwapStruct(MachO::nlist_64 &S) {
- SwapValue(S.n_strx);
- SwapValue(S.n_desc);
- SwapValue(S.n_value);
+ sys::swapByteOrder(S.n_strx);
+ sys::swapByteOrder(S.n_desc);
+ sys::swapByteOrder(S.n_value);
}
template<>
void SwapStruct(MachO::mach_header &H) {
- SwapValue(H.magic);
- SwapValue(H.cputype);
- SwapValue(H.cpusubtype);
- SwapValue(H.filetype);
- SwapValue(H.ncmds);
- SwapValue(H.sizeofcmds);
- SwapValue(H.flags);
+ sys::swapByteOrder(H.magic);
+ sys::swapByteOrder(H.cputype);
+ sys::swapByteOrder(H.cpusubtype);
+ sys::swapByteOrder(H.filetype);
+ sys::swapByteOrder(H.ncmds);
+ sys::swapByteOrder(H.sizeofcmds);
+ sys::swapByteOrder(H.flags);
}
template<>
void SwapStruct(MachO::mach_header_64 &H) {
- SwapValue(H.magic);
- SwapValue(H.cputype);
- SwapValue(H.cpusubtype);
- SwapValue(H.filetype);
- SwapValue(H.ncmds);
- SwapValue(H.sizeofcmds);
- SwapValue(H.flags);
- SwapValue(H.reserved);
+ sys::swapByteOrder(H.magic);
+ sys::swapByteOrder(H.cputype);
+ sys::swapByteOrder(H.cpusubtype);
+ sys::swapByteOrder(H.filetype);
+ sys::swapByteOrder(H.ncmds);
+ sys::swapByteOrder(H.sizeofcmds);
+ sys::swapByteOrder(H.flags);
+ sys::swapByteOrder(H.reserved);
}
template<>
void SwapStruct(MachO::symtab_command &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.symoff);
- SwapValue(C.nsyms);
- SwapValue(C.stroff);
- SwapValue(C.strsize);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.symoff);
+ sys::swapByteOrder(C.nsyms);
+ sys::swapByteOrder(C.stroff);
+ sys::swapByteOrder(C.strsize);
}
template<>
void SwapStruct(MachO::dysymtab_command &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.ilocalsym);
- SwapValue(C.nlocalsym);
- SwapValue(C.iextdefsym);
- SwapValue(C.nextdefsym);
- SwapValue(C.iundefsym);
- SwapValue(C.nundefsym);
- SwapValue(C.tocoff);
- SwapValue(C.ntoc);
- SwapValue(C.modtaboff);
- SwapValue(C.nmodtab);
- SwapValue(C.extrefsymoff);
- SwapValue(C.nextrefsyms);
- SwapValue(C.indirectsymoff);
- SwapValue(C.nindirectsyms);
- SwapValue(C.extreloff);
- SwapValue(C.nextrel);
- SwapValue(C.locreloff);
- SwapValue(C.nlocrel);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.ilocalsym);
+ sys::swapByteOrder(C.nlocalsym);
+ sys::swapByteOrder(C.iextdefsym);
+ sys::swapByteOrder(C.nextdefsym);
+ sys::swapByteOrder(C.iundefsym);
+ sys::swapByteOrder(C.nundefsym);
+ sys::swapByteOrder(C.tocoff);
+ sys::swapByteOrder(C.ntoc);
+ sys::swapByteOrder(C.modtaboff);
+ sys::swapByteOrder(C.nmodtab);
+ sys::swapByteOrder(C.extrefsymoff);
+ sys::swapByteOrder(C.nextrefsyms);
+ sys::swapByteOrder(C.indirectsymoff);
+ sys::swapByteOrder(C.nindirectsyms);
+ sys::swapByteOrder(C.extreloff);
+ sys::swapByteOrder(C.nextrel);
+ sys::swapByteOrder(C.locreloff);
+ sys::swapByteOrder(C.nlocrel);
}
template<>
void SwapStruct(MachO::linkedit_data_command &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.dataoff);
- SwapValue(C.datasize);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.dataoff);
+ sys::swapByteOrder(C.datasize);
}
template<>
void SwapStruct(MachO::segment_command &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.vmaddr);
- SwapValue(C.vmsize);
- SwapValue(C.fileoff);
- SwapValue(C.filesize);
- SwapValue(C.maxprot);
- SwapValue(C.initprot);
- SwapValue(C.nsects);
- SwapValue(C.flags);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.vmaddr);
+ sys::swapByteOrder(C.vmsize);
+ sys::swapByteOrder(C.fileoff);
+ sys::swapByteOrder(C.filesize);
+ sys::swapByteOrder(C.maxprot);
+ sys::swapByteOrder(C.initprot);
+ sys::swapByteOrder(C.nsects);
+ sys::swapByteOrder(C.flags);
}
template<>
void SwapStruct(MachO::segment_command_64 &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.vmaddr);
- SwapValue(C.vmsize);
- SwapValue(C.fileoff);
- SwapValue(C.filesize);
- SwapValue(C.maxprot);
- SwapValue(C.initprot);
- SwapValue(C.nsects);
- SwapValue(C.flags);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.vmaddr);
+ sys::swapByteOrder(C.vmsize);
+ sys::swapByteOrder(C.fileoff);
+ sys::swapByteOrder(C.filesize);
+ sys::swapByteOrder(C.maxprot);
+ sys::swapByteOrder(C.initprot);
+ sys::swapByteOrder(C.nsects);
+ sys::swapByteOrder(C.flags);
}
template<>
void SwapStruct(uint32_t &C) {
- SwapValue(C);
+ sys::swapByteOrder(C);
}
template<>
void SwapStruct(MachO::linker_options_command &C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.count);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.count);
}
template<>
void SwapStruct(MachO::version_min_command&C) {
- SwapValue(C.cmd);
- SwapValue(C.cmdsize);
- SwapValue(C.version);
- SwapValue(C.reserved);
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.version);
+ sys::swapByteOrder(C.reserved);
+}
+
+template<>
+void SwapStruct(MachO::dylib_command&C) {
+ sys::swapByteOrder(C.cmd);
+ sys::swapByteOrder(C.cmdsize);
+ sys::swapByteOrder(C.dylib.name);
+ sys::swapByteOrder(C.dylib.timestamp);
+ sys::swapByteOrder(C.dylib.current_version);
+ sys::swapByteOrder(C.dylib.compatibility_version);
}
template<>
void SwapStruct(MachO::data_in_code_entry &C) {
- SwapValue(C.offset);
- SwapValue(C.length);
- SwapValue(C.kind);
+ sys::swapByteOrder(C.offset);
+ sys::swapByteOrder(C.length);
+ sys::swapByteOrder(C.kind);
}
template<typename T>
@@ -306,7 +312,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
uint32_t Val = O->getPlainRelocationSymbolNum(RE);
for (const SymbolRef &Symbol : O->symbols()) {
- error_code ec;
+ std::error_code ec;
uint64_t Addr;
StringRef Name;
@@ -323,7 +329,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
// If we couldn't find a symbol that this relocation refers to, try
// to find a section beginning instead.
for (const SectionRef &Section : O->sections()) {
- error_code ec;
+ std::error_code ec;
uint64_t Addr;
StringRef Name;
@@ -416,10 +422,10 @@ static uint32_t getSectionFlags(const MachOObjectFile *O,
return Sect.flags;
}
-MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian,
- bool Is64bits, error_code &EC,
- bool BufferOwned)
- : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object, BufferOwned),
+MachOObjectFile::MachOObjectFile(std::unique_ptr<MemoryBuffer> Object,
+ bool IsLittleEndian, bool Is64bits,
+ std::error_code &EC)
+ : ObjectFile(getMachOType(IsLittleEndian, Is64bits), std::move(Object)),
SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
DataInCodeLoadCmd(nullptr) {
uint32_t LoadCommandCount = this->getHeader().ncmds;
@@ -443,6 +449,12 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian,
const char *Sec = getSectionPtr(this, Load, J);
Sections.push_back(Sec);
}
+ } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB ||
+ Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
+ Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
+ Load.C.cmd == MachO::LC_REEXPORT_DYLIB ||
+ Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
+ Libraries.push_back(Load.Ptr);
}
if (I == LoadCommandCount - 1)
@@ -459,8 +471,8 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
Symb.p += SymbolTableEntrySize;
}
-error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
- StringRef &Res) const {
+std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
+ StringRef &Res) const {
StringRef StringTable = getStringTableData();
nlist_base Entry = getSymbolTableEntryBase(this, Symb);
const char *Start = &StringTable.data()[Entry.n_strx];
@@ -468,8 +480,32 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl Symb,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
- uint64_t &Res) const {
+// getIndirectName() returns the name of the alias'ed symbol who's string table
+// index is in the n_value field.
+std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
+ StringRef &Res) const {
+ StringRef StringTable = getStringTableData();
+ uint64_t NValue;
+ if (is64Bit()) {
+ MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
+ NValue = Entry.n_value;
+ if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
+ return object_error::parse_failed;
+ } else {
+ MachO::nlist Entry = getSymbolTableEntry(Symb);
+ NValue = Entry.n_value;
+ if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
+ return object_error::parse_failed;
+ }
+ if (NValue >= StringTable.size())
+ return object_error::parse_failed;
+ const char *Start = &StringTable.data()[NValue];
+ Res = StringRef(Start);
+ return object_error::success;
+}
+
+std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
+ uint64_t &Res) const {
if (is64Bit()) {
MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF &&
@@ -488,8 +524,8 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
- uint32_t &Result) const {
+std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
+ uint32_t &Result) const {
uint32_t flags = getSymbolFlags(DRI);
if (flags & SymbolRef::SF_Common) {
nlist_base Entry = getSymbolTableEntryBase(this, DRI);
@@ -500,8 +536,8 @@ error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
- uint64_t &Result) const {
+std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+ uint64_t &Result) const {
uint64_t BeginOffset;
uint64_t EndOffset = 0;
uint8_t SectionIndex;
@@ -549,8 +585,8 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
return object_error::success;
}
-error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
- SymbolRef::Type &Res) const {
+std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
+ SymbolRef::Type &Res) const {
nlist_base Entry = getSymbolTableEntryBase(this, Symb);
uint8_t n_type = Entry.n_type;
@@ -584,6 +620,9 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF)
Result |= SymbolRef::SF_Undefined;
+ if ((MachOType & MachO::N_TYPE) == MachO::N_INDR)
+ Result |= SymbolRef::SF_Indirect;
+
if (MachOType & MachO::N_STAB)
Result |= SymbolRef::SF_FormatSpecific;
@@ -606,9 +645,8 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
return Result;
}
-error_code
-MachOObjectFile::getSymbolSection(DataRefImpl Symb,
- section_iterator &Res) const {
+std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+ section_iterator &Res) const {
nlist_base Entry = getSymbolTableEntryBase(this, Symb);
uint8_t index = Entry.n_sect;
@@ -627,15 +665,15 @@ void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const {
Sec.d.a++;
}
-error_code
-MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const {
+std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec,
+ StringRef &Result) const {
ArrayRef<char> Raw = getSectionRawName(Sec);
Result = parseSegmentOrSectionName(Raw.data());
return object_error::success;
}
-error_code
-MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
+std::error_code MachOObjectFile::getSectionAddress(DataRefImpl Sec,
+ uint64_t &Res) const {
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Res = Sect.addr;
@@ -646,8 +684,8 @@ MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const {
return object_error::success;
}
-error_code
-MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
+std::error_code MachOObjectFile::getSectionSize(DataRefImpl Sec,
+ uint64_t &Res) const {
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
Res = Sect.size;
@@ -659,8 +697,8 @@ MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const {
return object_error::success;
}
-error_code
-MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
+std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
+ StringRef &Res) const {
uint32_t Offset;
uint64_t Size;
@@ -678,8 +716,8 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const {
return object_error::success;
}
-error_code
-MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
+std::error_code MachOObjectFile::getSectionAlignment(DataRefImpl Sec,
+ uint64_t &Res) const {
uint32_t Align;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
@@ -693,14 +731,15 @@ MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const {
return object_error::success;
}
-error_code
-MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
+std::error_code MachOObjectFile::isSectionText(DataRefImpl Sec,
+ bool &Res) const {
uint32_t Flags = getSectionFlags(this, Sec);
Res = Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
return object_error::success;
}
-error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const {
+std::error_code MachOObjectFile::isSectionData(DataRefImpl Sec,
+ bool &Result) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
@@ -709,7 +748,8 @@ error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const {
return object_error::success;
}
-error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const {
+std::error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec,
+ bool &Result) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
@@ -718,7 +758,7 @@ error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const {
return object_error::success;
}
-error_code
+std::error_code
MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
bool &Result) const {
// FIXME: Unimplemented.
@@ -726,15 +766,15 @@ MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
return object_error::success;
}
-error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
- bool &Result) const {
+std::error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
+ bool &Result) const {
// FIXME: Unimplemented.
Result = false;
return object_error::success;
}
-error_code
-MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
+std::error_code MachOObjectFile::isSectionZeroInit(DataRefImpl Sec,
+ bool &Res) const {
uint32_t Flags = getSectionFlags(this, Sec);
unsigned SectionType = Flags & MachO::SECTION_TYPE;
Res = SectionType == MachO::S_ZEROFILL ||
@@ -742,8 +782,8 @@ MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const {
return object_error::success;
}
-error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
- bool &Result) const {
+std::error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
+ bool &Result) const {
// Consider using the code from isSectionText to look for __const sections.
// Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS
// to use section attributes to distinguish code from data.
@@ -753,9 +793,9 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec,
return object_error::success;
}
-error_code
-MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
- bool &Result) const {
+std::error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+ DataRefImpl Symb,
+ bool &Result) const {
SymbolRef::Type ST;
this->getSymbolType(Symb, ST);
if (ST == SymbolRef::ST_Unknown) {
@@ -803,8 +843,8 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
++Rel.d.b;
}
-error_code
-MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
+std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
+ uint64_t &Res) const {
uint64_t Offset;
getRelocationOffset(Rel, Offset);
@@ -816,8 +856,8 @@ MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
return object_error::success;
}
-error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
- uint64_t &Res) const {
+std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+ uint64_t &Res) const {
assert(getHeader().filetype == MachO::MH_OBJECT &&
"Only implemented for MH_OBJECT");
MachO::any_relocation_info RE = getRelocation(Rel);
@@ -828,6 +868,9 @@ error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
symbol_iterator
MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
MachO::any_relocation_info RE = getRelocation(Rel);
+ if (isRelocationScattered(RE))
+ return symbol_end();
+
uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE);
bool isExtern = getPlainRelocationExternal(RE);
if (!isExtern)
@@ -843,14 +886,14 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
return symbol_iterator(SymbolRef(Sym, this));
}
-error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
- uint64_t &Res) const {
+std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
+ uint64_t &Res) const {
MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationType(RE);
return object_error::success;
}
-error_code
+std::error_code
MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
StringRef res;
@@ -963,7 +1006,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
return object_error::success;
}
-error_code
+std::error_code
MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
MachO::any_relocation_info RE = getRelocation(Rel);
@@ -1139,8 +1182,8 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
return object_error::success;
}
-error_code
-MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
+std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
+ bool &Result) const {
unsigned Arch = getArch();
uint64_t Type;
getRelocationType(Rel, Type);
@@ -1167,16 +1210,199 @@ MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const {
return object_error::success;
}
-error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
- LibraryRef &Res) const {
+std::error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
+ LibraryRef &Res) const {
report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
}
-error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
- StringRef &Res) const {
+std::error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
+ StringRef &Res) const {
report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
}
+//
+// guessLibraryShortName() is passed a name of a dynamic library and returns a
+// guess on what the short name is. Then name is returned as a substring of the
+// StringRef Name passed in. The name of the dynamic library is recognized as
+// a framework if it has one of the two following forms:
+// Foo.framework/Versions/A/Foo
+// Foo.framework/Foo
+// Where A and Foo can be any string. And may contain a trailing suffix
+// starting with an underbar. If the Name is recognized as a framework then
+// isFramework is set to true else it is set to false. If the Name has a
+// suffix then Suffix is set to the substring in Name that contains the suffix
+// else it is set to a NULL StringRef.
+//
+// The Name of the dynamic library is recognized as a library name if it has
+// one of the two following forms:
+// libFoo.A.dylib
+// libFoo.dylib
+// The library may have a suffix trailing the name Foo of the form:
+// libFoo_profile.A.dylib
+// libFoo_profile.dylib
+//
+// The Name of the dynamic library is also recognized as a library name if it
+// has the following form:
+// Foo.qtx
+//
+// If the Name of the dynamic library is none of the forms above then a NULL
+// StringRef is returned.
+//
+StringRef MachOObjectFile::guessLibraryShortName(StringRef Name,
+ bool &isFramework,
+ StringRef &Suffix) {
+ StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx;
+ size_t a, b, c, d, Idx;
+
+ isFramework = false;
+ Suffix = StringRef();
+
+ // Pull off the last component and make Foo point to it
+ a = Name.rfind('/');
+ if (a == Name.npos || a == 0)
+ goto guess_library;
+ Foo = Name.slice(a+1, Name.npos);
+
+ // Look for a suffix starting with a '_'
+ Idx = Foo.rfind('_');
+ if (Idx != Foo.npos && Foo.size() >= 2) {
+ Suffix = Foo.slice(Idx, Foo.npos);
+ Foo = Foo.slice(0, Idx);
+ }
+
+ // First look for the form Foo.framework/Foo
+ b = Name.rfind('/', a);
+ if (b == Name.npos)
+ Idx = 0;
+ else
+ Idx = b+1;
+ F = Name.slice(Idx, Idx + Foo.size());
+ DotFramework = Name.slice(Idx + Foo.size(),
+ Idx + Foo.size() + sizeof(".framework/")-1);
+ if (F == Foo && DotFramework == ".framework/") {
+ isFramework = true;
+ return Foo;
+ }
+
+ // Next look for the form Foo.framework/Versions/A/Foo
+ if (b == Name.npos)
+ goto guess_library;
+ c = Name.rfind('/', b);
+ if (c == Name.npos || c == 0)
+ goto guess_library;
+ V = Name.slice(c+1, Name.npos);
+ if (!V.startswith("Versions/"))
+ goto guess_library;
+ d = Name.rfind('/', c);
+ if (d == Name.npos)
+ Idx = 0;
+ else
+ Idx = d+1;
+ F = Name.slice(Idx, Idx + Foo.size());
+ DotFramework = Name.slice(Idx + Foo.size(),
+ Idx + Foo.size() + sizeof(".framework/")-1);
+ if (F == Foo && DotFramework == ".framework/") {
+ isFramework = true;
+ return Foo;
+ }
+
+guess_library:
+ // pull off the suffix after the "." and make a point to it
+ a = Name.rfind('.');
+ if (a == Name.npos || a == 0)
+ return StringRef();
+ Dylib = Name.slice(a, Name.npos);
+ if (Dylib != ".dylib")
+ goto guess_qtx;
+
+ // First pull off the version letter for the form Foo.A.dylib if any.
+ if (a >= 3) {
+ Dot = Name.slice(a-2, a-1);
+ if (Dot == ".")
+ a = a - 2;
+ }
+
+ b = Name.rfind('/', a);
+ if (b == Name.npos)
+ b = 0;
+ else
+ b = b+1;
+ // ignore any suffix after an underbar like Foo_profile.A.dylib
+ Idx = Name.find('_', b);
+ if (Idx != Name.npos && Idx != b) {
+ Lib = Name.slice(b, Idx);
+ Suffix = Name.slice(Idx, a);
+ }
+ else
+ Lib = Name.slice(b, a);
+ // There are incorrect library names of the form:
+ // libATS.A_profile.dylib so check for these.
+ if (Lib.size() >= 3) {
+ Dot = Lib.slice(Lib.size()-2, Lib.size()-1);
+ if (Dot == ".")
+ Lib = Lib.slice(0, Lib.size()-2);
+ }
+ return Lib;
+
+guess_qtx:
+ Qtx = Name.slice(a, Name.npos);
+ if (Qtx != ".qtx")
+ return StringRef();
+ b = Name.rfind('/', a);
+ if (b == Name.npos)
+ Lib = Name.slice(0, a);
+ else
+ Lib = Name.slice(b+1, a);
+ // There are library names of the form: QT.A.qtx so check for these.
+ if (Lib.size() >= 3) {
+ Dot = Lib.slice(Lib.size()-2, Lib.size()-1);
+ if (Dot == ".")
+ Lib = Lib.slice(0, Lib.size()-2);
+ }
+ return Lib;
+}
+
+// getLibraryShortNameByIndex() is used to get the short name of the library
+// for an undefined symbol in a linked Mach-O binary that was linked with the
+// normal two-level namespace default (that is MH_TWOLEVEL in the header).
+// It is passed the index (0 - based) of the library as translated from
+// GET_LIBRARY_ORDINAL (1 - based).
+std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
+ StringRef &Res) {
+ if (Index >= Libraries.size())
+ return object_error::parse_failed;
+
+ MachO::dylib_command D =
+ getStruct<MachO::dylib_command>(this, Libraries[Index]);
+ if (D.dylib.name >= D.cmdsize)
+ return object_error::parse_failed;
+
+ // If the cache of LibrariesShortNames is not built up do that first for
+ // all the Libraries.
+ if (LibrariesShortNames.size() == 0) {
+ for (unsigned i = 0; i < Libraries.size(); i++) {
+ MachO::dylib_command D =
+ getStruct<MachO::dylib_command>(this, Libraries[i]);
+ if (D.dylib.name >= D.cmdsize) {
+ LibrariesShortNames.push_back(StringRef());
+ continue;
+ }
+ const char *P = (const char *)(Libraries[i]) + D.dylib.name;
+ StringRef Name = StringRef(P);
+ StringRef Suffix;
+ bool isFramework;
+ StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix);
+ if (shortName == StringRef())
+ LibrariesShortNames.push_back(Name);
+ else
+ LibrariesShortNames.push_back(shortName);
+ }
+ }
+
+ Res = LibrariesShortNames[Index];
+ return object_error::success;
+}
+
basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
return getSymbolByIndex(0);
}
@@ -1288,6 +1514,108 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
}
}
+Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) {
+ switch (CPUType) {
+ case MachO::CPU_TYPE_I386:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_I386_ALL:
+ return Triple("i386-apple-darwin");
+ default:
+ return Triple();
+ }
+ case MachO::CPU_TYPE_X86_64:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_X86_64_ALL:
+ return Triple("x86_64-apple-darwin");
+ case MachO::CPU_SUBTYPE_X86_64_H:
+ return Triple("x86_64h-apple-darwin");
+ default:
+ return Triple();
+ }
+ case MachO::CPU_TYPE_ARM:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM_V4T:
+ return Triple("armv4t-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V5TEJ:
+ return Triple("armv5e-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V6:
+ return Triple("armv6-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V6M:
+ return Triple("armv6m-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7EM:
+ return Triple("armv7em-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7K:
+ return Triple("armv7k-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7M:
+ return Triple("armv7m-apple-darwin");
+ case MachO::CPU_SUBTYPE_ARM_V7S:
+ return Triple("armv7s-apple-darwin");
+ default:
+ return Triple();
+ }
+ case MachO::CPU_TYPE_ARM64:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_ARM64_ALL:
+ return Triple("arm64-apple-darwin");
+ default:
+ return Triple();
+ }
+ case MachO::CPU_TYPE_POWERPC:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_POWERPC_ALL:
+ return Triple("ppc-apple-darwin");
+ default:
+ return Triple();
+ }
+ case MachO::CPU_TYPE_POWERPC64:
+ switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
+ case MachO::CPU_SUBTYPE_POWERPC_ALL:
+ return Triple("ppc64-apple-darwin");
+ default:
+ return Triple();
+ }
+ default:
+ return Triple();
+ }
+}
+
+Triple MachOObjectFile::getHostArch() {
+ return Triple(sys::getDefaultTargetTriple());
+}
+
+Triple MachOObjectFile::getArch(StringRef ArchFlag) {
+ if (ArchFlag == "i386")
+ return Triple("i386-apple-darwin");
+ else if (ArchFlag == "x86_64")
+ return Triple("x86_64-apple-darwin");
+ else if (ArchFlag == "x86_64h")
+ return Triple("x86_64h-apple-darwin");
+ else if (ArchFlag == "armv4t" || ArchFlag == "arm")
+ return Triple("armv4t-apple-darwin");
+ else if (ArchFlag == "armv5e")
+ return Triple("armv5e-apple-darwin");
+ else if (ArchFlag == "armv6")
+ return Triple("armv6-apple-darwin");
+ else if (ArchFlag == "armv6m")
+ return Triple("armv6m-apple-darwin");
+ else if (ArchFlag == "armv7em")
+ return Triple("armv7em-apple-darwin");
+ else if (ArchFlag == "armv7k")
+ return Triple("armv7k-apple-darwin");
+ else if (ArchFlag == "armv7k")
+ return Triple("armv7m-apple-darwin");
+ else if (ArchFlag == "armv7s")
+ return Triple("armv7s-apple-darwin");
+ else if (ArchFlag == "arm64")
+ return Triple("arm64-apple-darwin");
+ else if (ArchFlag == "ppc")
+ return Triple("ppc-apple-darwin");
+ else if (ArchFlag == "ppc64")
+ return Triple("ppc64-apple-darwin");
+ else
+ return Triple();
+}
+
unsigned MachOObjectFile::getArch() const {
return getArch(getCPUType(this));
}
@@ -1498,6 +1826,12 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const {
return getStruct<MachO::version_min_command>(this, L.Ptr);
}
+MachO::dylib_command
+MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const {
+ return getStruct<MachO::dylib_command>(this, L.Ptr);
+}
+
+
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
DataRefImpl Sec;
@@ -1574,7 +1908,7 @@ StringRef MachOObjectFile::getStringTableData() const {
bool MachOObjectFile::is64Bit() const {
return getType() == getMachOType(false, true) ||
- getType() == getMachOType(true, true);
+ getType() == getMachOType(true, true);
}
void MachOObjectFile::ReadULEB128s(uint64_t Index,
@@ -1589,23 +1923,25 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index,
}
}
-ErrorOr<ObjectFile *> ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer,
- bool BufferOwned) {
+const char *MachOObjectFile::getSectionPointer(DataRefImpl Rel) const {
+ return Sections[Rel.d.a];
+}
+
+ErrorOr<ObjectFile *>
+ObjectFile::createMachOObjectFile(std::unique_ptr<MemoryBuffer> &Buffer) {
StringRef Magic = Buffer->getBuffer().slice(0, 4);
- error_code EC;
+ std::error_code EC;
std::unique_ptr<MachOObjectFile> Ret;
if (Magic == "\xFE\xED\xFA\xCE")
- Ret.reset(new MachOObjectFile(Buffer, false, false, EC, BufferOwned));
+ Ret.reset(new MachOObjectFile(std::move(Buffer), false, false, EC));
else if (Magic == "\xCE\xFA\xED\xFE")
- Ret.reset(new MachOObjectFile(Buffer, true, false, EC, BufferOwned));
+ Ret.reset(new MachOObjectFile(std::move(Buffer), true, false, EC));
else if (Magic == "\xFE\xED\xFA\xCF")
- Ret.reset(new MachOObjectFile(Buffer, false, true, EC, BufferOwned));
+ Ret.reset(new MachOObjectFile(std::move(Buffer), false, true, EC));
else if (Magic == "\xCF\xFA\xED\xFE")
- Ret.reset(new MachOObjectFile(Buffer, true, true, EC, BufferOwned));
- else {
- delete Buffer;
+ Ret.reset(new MachOObjectFile(std::move(Buffer), true, true, EC));
+ else
return object_error::parse_failed;
- }
if (EC)
return EC;
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 5085efd..4ba5d96 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -23,26 +23,21 @@ using namespace llvm;
using namespace object;
template<typename T>
-static void SwapValue(T &Value) {
- Value = sys::SwapByteOrder(Value);
-}
-
-template<typename T>
static void SwapStruct(T &Value);
template<>
void SwapStruct(MachO::fat_header &H) {
- SwapValue(H.magic);
- SwapValue(H.nfat_arch);
+ sys::swapByteOrder(H.magic);
+ sys::swapByteOrder(H.nfat_arch);
}
template<>
void SwapStruct(MachO::fat_arch &H) {
- SwapValue(H.cputype);
- SwapValue(H.cpusubtype);
- SwapValue(H.offset);
- SwapValue(H.size);
- SwapValue(H.align);
+ sys::swapByteOrder(H.cputype);
+ sys::swapByteOrder(H.cpusubtype);
+ sys::swapByteOrder(H.offset);
+ sys::swapByteOrder(H.size);
+ sys::swapByteOrder(H.align);
}
template<typename T>
@@ -58,7 +53,7 @@ static T getUniversalBinaryStruct(const char *Ptr) {
MachOUniversalBinary::ObjectForArch::ObjectForArch(
const MachOUniversalBinary *Parent, uint32_t Index)
: Parent(Parent), Index(Index) {
- if (!Parent || Index > Parent->getNumberOfObjects()) {
+ if (!Parent || Index >= Parent->getNumberOfObjects()) {
clear();
} else {
// Parse object header.
@@ -72,37 +67,29 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
}
}
-error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
- std::unique_ptr<ObjectFile> &Result) const {
+ErrorOr<std::unique_ptr<ObjectFile>>
+MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
if (Parent) {
StringRef ParentData = Parent->getData();
StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
- std::string ObjectName =
- Parent->getFileName().str() + ":" +
- Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
- MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
- ObjectData, ObjectName, false);
- ErrorOr<ObjectFile *> Obj = ObjectFile::createMachOObjectFile(ObjBuffer);
- if (error_code EC = Obj.getError())
- return EC;
- Result.reset(Obj.get());
- return object_error::success;
+ std::string ObjectName = Parent->getFileName().str();
+ std::unique_ptr<MemoryBuffer> ObjBuffer(
+ MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false));
+ return ObjectFile::createMachOObjectFile(ObjBuffer);
}
return object_error::parse_failed;
}
-error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
+std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
std::unique_ptr<Archive> &Result) const {
if (Parent) {
StringRef ParentData = Parent->getData();
StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
- std::string ObjectName =
- Parent->getFileName().str() + ":" +
- Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
- MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
- ObjectData, ObjectName, false);
- ErrorOr<Archive *> Obj = Archive::create(ObjBuffer);
- if (error_code EC = Obj.getError())
+ std::string ObjectName = Parent->getFileName().str();
+ std::unique_ptr<MemoryBuffer> ObjBuffer(
+ MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false));
+ ErrorOr<Archive *> Obj = Archive::create(std::move(ObjBuffer));
+ if (std::error_code EC = Obj.getError())
return EC;
Result.reset(Obj.get());
return object_error::success;
@@ -113,20 +100,20 @@ error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
void MachOUniversalBinary::anchor() { }
ErrorOr<MachOUniversalBinary *>
-MachOUniversalBinary::create(MemoryBuffer *Source) {
- error_code EC;
+MachOUniversalBinary::create(std::unique_ptr<MemoryBuffer> Source) {
+ std::error_code EC;
std::unique_ptr<MachOUniversalBinary> Ret(
- new MachOUniversalBinary(Source, EC));
+ new MachOUniversalBinary(std::move(Source), EC));
if (EC)
return EC;
return Ret.release();
}
-MachOUniversalBinary::MachOUniversalBinary(MemoryBuffer *Source,
- error_code &ec)
- : Binary(Binary::ID_MachOUniversalBinary, Source),
- NumberOfObjects(0) {
- if (Source->getBufferSize() < sizeof(MachO::fat_header)) {
+MachOUniversalBinary::MachOUniversalBinary(std::unique_ptr<MemoryBuffer> Source,
+ std::error_code &ec)
+ : Binary(Binary::ID_MachOUniversalBinary, std::move(Source)),
+ NumberOfObjects(0) {
+ if (Data->getBufferSize() < sizeof(MachO::fat_header)) {
ec = object_error::invalid_file_type;
return;
}
@@ -155,14 +142,14 @@ static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) {
}
}
-error_code MachOUniversalBinary::getObjectForArch(
- Triple::ArchType Arch, std::unique_ptr<ObjectFile> &Result) const {
+ErrorOr<std::unique_ptr<ObjectFile>>
+MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch) const {
MachO::CPUType CTM;
if (!getCTMForArch(Arch, CTM))
return object_error::arch_not_found;
for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) {
if (I->getCPUType() == static_cast<uint32_t>(CTM))
- return I->getAsObjectFile(Result);
+ return I->getAsObjectFile();
}
return object_error::arch_not_found;
}
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index b0068a8..567d87f 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -59,7 +59,9 @@ wrap(const relocation_iterator *SI) {
// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
- ErrorOr<ObjectFile*> ObjOrErr(ObjectFile::createObjectFile(unwrap(MemBuf)));
+ std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf));
+ ErrorOr<ObjectFile *> ObjOrErr(ObjectFile::createObjectFile(Buf));
+ Buf.release();
ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : nullptr;
return wrap(Obj);
}
@@ -89,7 +91,7 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
LLVMSymbolIteratorRef Sym) {
- if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+ if (std::error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
report_fatal_error(ec.message());
}
@@ -115,28 +117,28 @@ void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
// SectionRef accessors
const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
StringRef ret;
- if (error_code ec = (*unwrap(SI))->getName(ret))
+ if (std::error_code ec = (*unwrap(SI))->getName(ret))
report_fatal_error(ec.message());
return ret.data();
}
uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getSize(ret))
+ if (std::error_code ec = (*unwrap(SI))->getSize(ret))
report_fatal_error(ec.message());
return ret;
}
const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
StringRef ret;
- if (error_code ec = (*unwrap(SI))->getContents(ret))
+ if (std::error_code ec = (*unwrap(SI))->getContents(ret))
report_fatal_error(ec.message());
return ret.data();
}
uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ if (std::error_code ec = (*unwrap(SI))->getAddress(ret))
report_fatal_error(ec.message());
return ret;
}
@@ -144,7 +146,7 @@ uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
LLVMSymbolIteratorRef Sym) {
bool ret;
- if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
+ if (std::error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
report_fatal_error(ec.message());
return ret;
}
@@ -172,21 +174,21 @@ void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) {
// SymbolRef accessors
const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
StringRef ret;
- if (error_code ec = (*unwrap(SI))->getName(ret))
+ if (std::error_code ec = (*unwrap(SI))->getName(ret))
report_fatal_error(ec.message());
return ret.data();
}
uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getAddress(ret))
+ if (std::error_code ec = (*unwrap(SI))->getAddress(ret))
report_fatal_error(ec.message());
return ret;
}
uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getSize(ret))
+ if (std::error_code ec = (*unwrap(SI))->getSize(ret))
report_fatal_error(ec.message());
return ret;
}
@@ -194,14 +196,14 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
// RelocationRef accessors
uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
uint64_t ret;
- if (error_code ec = (*unwrap(RI))->getAddress(ret))
+ if (std::error_code ec = (*unwrap(RI))->getAddress(ret))
report_fatal_error(ec.message());
return ret;
}
uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
uint64_t ret;
- if (error_code ec = (*unwrap(RI))->getOffset(ret))
+ if (std::error_code ec = (*unwrap(RI))->getOffset(ret))
report_fatal_error(ec.message());
return ret;
}
@@ -213,7 +215,7 @@ LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
uint64_t ret;
- if (error_code ec = (*unwrap(RI))->getType(ret))
+ if (std::error_code ec = (*unwrap(RI))->getType(ret))
report_fatal_error(ec.message());
return ret;
}
@@ -221,7 +223,7 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
// NOTE: Caller takes ownership of returned string.
const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
SmallVector<char, 0> ret;
- if (error_code ec = (*unwrap(RI))->getTypeName(ret))
+ if (std::error_code ec = (*unwrap(RI))->getTypeName(ret))
report_fatal_error(ec.message());
char *str = static_cast<char*>(malloc(ret.size()));
@@ -232,7 +234,7 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
// NOTE: Caller takes ownership of returned string.
const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) {
SmallVector<char, 0> ret;
- if (error_code ec = (*unwrap(RI))->getValueString(ret))
+ if (std::error_code ec = (*unwrap(RI))->getValueString(ret))
report_fatal_error(ec.message());
char *str = static_cast<char*>(malloc(ret.size()));
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index d30f0cc..f5488c6 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -16,28 +16,27 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
using namespace llvm;
using namespace object;
void ObjectFile::anchor() { }
-ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *Source,
- bool BufferOwned)
- : SymbolicFile(Type, Source, BufferOwned) {}
+ObjectFile::ObjectFile(unsigned int Type, std::unique_ptr<MemoryBuffer> Source)
+ : SymbolicFile(Type, std::move(Source)) {}
-error_code ObjectFile::printSymbolName(raw_ostream &OS,
- DataRefImpl Symb) const {
+std::error_code ObjectFile::printSymbolName(raw_ostream &OS,
+ DataRefImpl Symb) const {
StringRef Name;
- if (error_code EC = getSymbolName(Symb, Name))
+ if (std::error_code EC = getSymbolName(Symb, Name))
return EC;
OS << Name;
return object_error::success;
}
-error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
- uint32_t &Result) const {
+std::error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI,
+ uint32_t &Result) const {
Result = 0;
return object_error::success;
}
@@ -46,9 +45,9 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const {
return section_iterator(SectionRef(Sec, this));
}
-ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object,
- bool BufferOwned,
- sys::fs::file_magic Type) {
+ErrorOr<ObjectFile *>
+ObjectFile::createObjectFile(std::unique_ptr<MemoryBuffer> &Object,
+ sys::fs::file_magic Type) {
if (Type == sys::fs::file_magic::unknown)
Type = sys::fs::identify_magic(Object->getBuffer());
@@ -58,14 +57,12 @@ ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object,
case sys::fs::file_magic::archive:
case sys::fs::file_magic::macho_universal_binary:
case sys::fs::file_magic::windows_resource:
- if (BufferOwned)
- delete Object;
return object_error::invalid_file_type;
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
case sys::fs::file_magic::elf_shared_object:
case sys::fs::file_magic::elf_core:
- return createELFObjectFile(Object, BufferOwned);
+ return createELFObjectFile(Object);
case sys::fs::file_magic::macho_object:
case sys::fs::file_magic::macho_executable:
case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
@@ -76,18 +73,19 @@ ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object,
case sys::fs::file_magic::macho_bundle:
case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
case sys::fs::file_magic::macho_dsym_companion:
- return createMachOObjectFile(Object, BufferOwned);
+ return createMachOObjectFile(Object);
case sys::fs::file_magic::coff_object:
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
- return createCOFFObjectFile(Object, BufferOwned);
+ return createCOFFObjectFile(std::move(Object));
}
llvm_unreachable("Unexpected Object File Type");
}
ErrorOr<ObjectFile *> ObjectFile::createObjectFile(StringRef ObjectPath) {
- std::unique_ptr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(ObjectPath, File))
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFile(ObjectPath);
+ if (std::error_code EC = FileOrErr.getError())
return EC;
- return createObjectFile(File.release());
+ return createObjectFile(FileOrErr.get());
}
diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp
new file mode 100644
index 0000000..081fadd
--- /dev/null
+++ b/lib/Object/RecordStreamer.cpp
@@ -0,0 +1,100 @@
+//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RecordStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+void RecordStreamer::markDefined(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Global:
+ S = DefinedGlobal;
+ break;
+ case NeverSeen:
+ case Defined:
+ case Used:
+ S = Defined;
+ break;
+ }
+}
+
+void RecordStreamer::markGlobal(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Defined:
+ S = DefinedGlobal;
+ break;
+
+ case NeverSeen:
+ case Global:
+ case Used:
+ S = Global;
+ break;
+ }
+}
+
+void RecordStreamer::markUsed(const MCSymbol &Symbol) {
+ State &S = Symbols[Symbol.getName()];
+ switch (S) {
+ case DefinedGlobal:
+ case Defined:
+ case Global:
+ break;
+
+ case NeverSeen:
+ case Used:
+ S = Used;
+ break;
+ }
+}
+
+void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); }
+
+RecordStreamer::const_iterator RecordStreamer::begin() {
+ return Symbols.begin();
+}
+
+RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
+
+RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+void RecordStreamer::EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCStreamer::EmitInstruction(Inst, STI);
+}
+
+void RecordStreamer::EmitLabel(MCSymbol *Symbol) {
+ MCStreamer::EmitLabel(Symbol);
+ markDefined(*Symbol);
+}
+
+void RecordStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ markDefined(*Symbol);
+ MCStreamer::EmitAssignment(Symbol, Value);
+}
+
+bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ if (Attribute == MCSA_Global)
+ markGlobal(*Symbol);
+ return true;
+}
+
+void RecordStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ markDefined(*Symbol);
+}
+
+void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ markDefined(*Symbol);
+}
diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h
new file mode 100644
index 0000000..10e70ef
--- /dev/null
+++ b/lib/Object/RecordStreamer.h
@@ -0,0 +1,42 @@
+//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_RECORD_STREAMER
+#define LLVM_OBJECT_RECORD_STREAMER
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class RecordStreamer : public MCStreamer {
+public:
+ enum State { NeverSeen, Global, Defined, DefinedGlobal, Used };
+
+private:
+ StringMap<State> Symbols;
+ void markDefined(const MCSymbol &Symbol);
+ void markGlobal(const MCSymbol &Symbol);
+ void markUsed(const MCSymbol &Symbol);
+ void visitUsedSymbol(const MCSymbol &Sym) override;
+
+public:
+ typedef StringMap<State>::const_iterator const_iterator;
+ const_iterator begin();
+ const_iterator end();
+ RecordStreamer(MCContext &Context);
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitLabel(MCSymbol *Symbol) override;
+ void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
+ bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
+ void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) override;
+ void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) override;
+};
+}
+#endif
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 495f0b6..30cf1a0 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -19,14 +19,14 @@
using namespace llvm;
using namespace object;
-SymbolicFile::SymbolicFile(unsigned int Type, MemoryBuffer *Source,
- bool BufferOwned)
- : Binary(Type, Source, BufferOwned) {}
+SymbolicFile::SymbolicFile(unsigned int Type,
+ std::unique_ptr<MemoryBuffer> Source)
+ : Binary(Type, std::move(Source)) {}
SymbolicFile::~SymbolicFile() {}
ErrorOr<SymbolicFile *>
-SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned,
+SymbolicFile::createSymbolicFile(std::unique_ptr<MemoryBuffer> &Object,
sys::fs::file_magic Type,
LLVMContext *Context) {
if (Type == sys::fs::file_magic::unknown)
@@ -35,14 +35,12 @@ SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned,
switch (Type) {
case sys::fs::file_magic::bitcode:
if (Context)
- return IRObjectFile::createIRObjectFile(Object, *Context, BufferOwned);
+ return IRObjectFile::createIRObjectFile(std::move(Object), *Context);
// Fallthrough
case sys::fs::file_magic::unknown:
case sys::fs::file_magic::archive:
case sys::fs::file_magic::macho_universal_binary:
case sys::fs::file_magic::windows_resource:
- if (BufferOwned)
- delete Object;
return object_error::invalid_file_type;
case sys::fs::file_magic::elf_relocatable:
case sys::fs::file_magic::elf_executable:
@@ -61,7 +59,7 @@ SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned,
case sys::fs::file_magic::coff_object:
case sys::fs::file_magic::coff_import_library:
case sys::fs::file_magic::pecoff_executable:
- return ObjectFile::createObjectFile(Object, BufferOwned, Type);
+ return ObjectFile::createObjectFile(Object, Type);
}
llvm_unreachable("Unexpected Binary File Type");
}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index a5ab8d7..5848bb1 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -234,44 +234,40 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
OptSpecifier Id1, OptSpecifier Id2) const {
- for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
- ie = filtered_end(); it != ie; ++it) {
- (*it)->claim();
- (*it)->render(*this, Output);
+ for (auto Arg: filtered(Id0, Id1, Id2)) {
+ Arg->claim();
+ Arg->render(*this, Output);
}
}
void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
OptSpecifier Id1, OptSpecifier Id2) const {
- for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
- ie = filtered_end(); it != ie; ++it) {
- (*it)->claim();
- for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
- Output.push_back((*it)->getValue(i));
+ for (auto Arg : filtered(Id0, Id1, Id2)) {
+ Arg->claim();
+ for (unsigned i = 0, e = Arg->getNumValues(); i != e; ++i)
+ Output.push_back(Arg->getValue(i));
}
}
void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
const char *Translation,
bool Joined) const {
- for (arg_iterator it = filtered_begin(Id0),
- ie = filtered_end(); it != ie; ++it) {
- (*it)->claim();
+ for (auto Arg: filtered(Id0)) {
+ Arg->claim();
if (Joined) {
Output.push_back(MakeArgString(StringRef(Translation) +
- (*it)->getValue(0)));
+ Arg->getValue(0)));
} else {
Output.push_back(Translation);
- Output.push_back((*it)->getValue(0));
+ Output.push_back(Arg->getValue(0));
}
}
}
void ArgList::ClaimAllArgs(OptSpecifier Id0) const {
- for (arg_iterator it = filtered_begin(Id0),
- ie = filtered_end(); it != ie; ++it)
- (*it)->claim();
+ for (auto Arg : filtered(Id0))
+ Arg->claim();
}
void ArgList::ClaimAllArgs() const {
@@ -350,30 +346,27 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) {
}
Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
- SynthesizedArgs.push_back(make_unique<Arg>(
- Opt,
- ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
- BaseArgs.MakeIndex(Opt.getName()), BaseArg));
+ SynthesizedArgs.push_back(
+ make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ BaseArgs.MakeIndex(Opt.getName()), BaseArg));
return SynthesizedArgs.back().get();
}
Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Value);
- SynthesizedArgs.push_back(make_unique<Arg>(
- Opt,
- ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
- Index, BaseArgs.getArgString(Index), BaseArg));
+ SynthesizedArgs.push_back(
+ make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ Index, BaseArgs.getArgString(Index), BaseArg));
return SynthesizedArgs.back().get();
}
Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
- SynthesizedArgs.push_back(make_unique<Arg>(
- Opt,
- ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
- Index, BaseArgs.getArgString(Index + 1), BaseArg));
+ SynthesizedArgs.push_back(
+ make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()),
+ Index, BaseArgs.getArgString(Index + 1), BaseArg));
return SynthesizedArgs.back().get();
}
@@ -381,8 +374,7 @@ Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
SynthesizedArgs.push_back(make_unique<Arg>(
- Opt,
- ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
- Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
+ Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index,
+ BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
return SynthesizedArgs.back().get();
}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index de2b13d..0121222 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -18,10 +18,10 @@
using namespace llvm;
namespace {
-class InstrProfErrorCategoryType : public error_category {
- const char *name() const override { return "llvm.instrprof"; }
+class InstrProfErrorCategoryType : public std::error_category {
+ const char *name() const LLVM_NOEXCEPT override { return "llvm.instrprof"; }
std::string message(int IE) const override {
- instrprof_error::ErrorType E = static_cast<instrprof_error::ErrorType>(IE);
+ instrprof_error E = static_cast<instrprof_error>(IE);
switch (E) {
case instrprof_error::success:
return "Success";
@@ -52,15 +52,10 @@ class InstrProfErrorCategoryType : public error_category {
}
llvm_unreachable("A value of instrprof_error has no message.");
}
- error_condition default_error_condition(int EV) const override {
- if (EV == instrprof_error::success)
- return errc::success;
- return errc::invalid_argument;
- }
};
}
-const error_category &llvm::instrprof_category() {
+const std::error_category &llvm::instrprof_category() {
static InstrProfErrorCategoryType C;
return C;
}
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 7014f5e..0b36728 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -21,10 +21,13 @@
using namespace llvm;
-static error_code setupMemoryBuffer(std::string Path,
- std::unique_ptr<MemoryBuffer> &Buffer) {
- if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
+static std::error_code
+setupMemoryBuffer(std::string Path, std::unique_ptr<MemoryBuffer> &Buffer) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFileOrSTDIN(Path);
+ if (std::error_code EC = BufferOrErr.getError())
return EC;
+ Buffer = std::move(BufferOrErr.get());
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
@@ -32,15 +35,16 @@ static error_code setupMemoryBuffer(std::string Path,
return instrprof_error::success;
}
-static error_code initializeReader(InstrProfReader &Reader) {
+static std::error_code initializeReader(InstrProfReader &Reader) {
return Reader.readHeader();
}
-error_code InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
+std::error_code
+InstrProfReader::create(std::string Path,
+ std::unique_ptr<InstrProfReader> &Result) {
// Set up the buffer to read.
std::unique_ptr<MemoryBuffer> Buffer;
- if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
return EC;
// Create the reader.
@@ -57,11 +61,11 @@ error_code InstrProfReader::create(std::string Path,
return initializeReader(*Result);
}
-error_code IndexedInstrProfReader::create(
+std::error_code IndexedInstrProfReader::create(
std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
// Set up the buffer to read.
std::unique_ptr<MemoryBuffer> Buffer;
- if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ if (std::error_code EC = setupMemoryBuffer(Path, Buffer))
return EC;
// Create the reader.
@@ -78,7 +82,7 @@ void InstrProfIterator::Increment() {
*this = InstrProfIterator();
}
-error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Skip empty lines.
while (!Line.is_at_end() && Line->empty())
++Line;
@@ -157,11 +161,11 @@ bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
uint64_t Magic =
*reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
return getRawMagic<IntPtrT>() == Magic ||
- sys::SwapByteOrder(getRawMagic<IntPtrT>()) == Magic;
+ sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
}
template <class IntPtrT>
-error_code RawInstrProfReader<IntPtrT>::readHeader() {
+std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
if (!hasFormat(*DataBuffer))
return error(instrprof_error::bad_magic);
if (DataBuffer->getBufferSize() < sizeof(RawHeader))
@@ -173,7 +177,8 @@ error_code RawInstrProfReader<IntPtrT>::readHeader() {
}
template <class IntPtrT>
-error_code RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
+std::error_code
+RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
const char *End = DataBuffer->getBufferEnd();
// Skip zero padding between profiles.
while (CurrentPos != End && *CurrentPos == 0)
@@ -200,7 +205,8 @@ static uint64_t getRawVersion() {
}
template <class IntPtrT>
-error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
+std::error_code
+RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
if (swap(Header.Version) != getRawVersion())
return error(instrprof_error::unsupported_version);
@@ -229,10 +235,10 @@ error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
}
template <class IntPtrT>
-error_code
+std::error_code
RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
if (Data == DataEnd)
- if (error_code EC = readNextHeader(ProfileEnd))
+ if (std::error_code EC = readNextHeader(ProfileEnd))
return EC;
// Get the raw data.
@@ -286,7 +292,7 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
return Magic == IndexedInstrProf::Magic;
}
-error_code IndexedInstrProfReader::readHeader() {
+std::error_code IndexedInstrProfReader::readHeader() {
const unsigned char *Start =
(const unsigned char *)DataBuffer->getBufferStart();
const unsigned char *Cur = Start;
@@ -324,7 +330,7 @@ error_code IndexedInstrProfReader::readHeader() {
return success();
}
-error_code IndexedInstrProfReader::getFunctionCounts(
+std::error_code IndexedInstrProfReader::getFunctionCounts(
StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
const auto &Iter = Index->find(FuncName);
if (Iter == Index->end())
@@ -339,7 +345,8 @@ error_code IndexedInstrProfReader::getFunctionCounts(
return success();
}
-error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+std::error_code
+IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
// Are we out of records?
if (RecordIterator == Index->data_end())
return error(instrprof_error::eof);
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 83c41d9..e55c299 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -66,9 +66,10 @@ public:
};
}
-error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
- uint64_t FunctionHash,
- ArrayRef<uint64_t> Counters) {
+std::error_code
+InstrProfWriter::addFunctionCounts(StringRef FunctionName,
+ uint64_t FunctionHash,
+ ArrayRef<uint64_t> Counters) {
auto Where = FunctionData.find(FunctionName);
if (Where == FunctionData.end()) {
// If this is the first time we've seen this function, just add it.
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f9fe095..7989e30 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1372,7 +1372,9 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
case PackCategoriesIntoKey(fcZero, fcNaN):
case PackCategoriesIntoKey(fcNormal, fcNaN):
case PackCategoriesIntoKey(fcInfinity, fcNaN):
- sign = false;
+ // We need to be sure to flip the sign here for subtraction because we
+ // don't have a separate negate operation so -NaN becomes 0 - NaN here.
+ sign = rhs.sign ^ subtract;
category = fcNaN;
copySignificand(rhs);
return opOK;
diff --git a/lib/Support/ARMWinEH.cpp b/lib/Support/ARMWinEH.cpp
new file mode 100644
index 0000000..03c150f
--- /dev/null
+++ b/lib/Support/ARMWinEH.cpp
@@ -0,0 +1,38 @@
+//===-- ARMWinEH.cpp - Windows on ARM EH Support Functions ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ARMWinEH.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace ARM {
+namespace WinEH {
+std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) {
+ uint8_t NumRegisters = RF.Reg();
+ uint8_t RegistersVFP = RF.R();
+ uint8_t LinkRegister = RF.L();
+ uint8_t ChainedFrame = RF.C();
+
+ uint16_t GPRMask = (ChainedFrame << 11) | (LinkRegister << 14);
+ uint32_t VFPMask = 0;
+
+ if (RegistersVFP)
+ VFPMask |= (((1 << ((NumRegisters + 1) % 8)) - 1) << 8);
+ else
+ GPRMask |= (((1 << (NumRegisters + 1)) - 1) << 4);
+
+ if (PrologueFolding(RF))
+ GPRMask |= (((1 << (NumRegisters + 1)) - 1) << (~RF.StackAdjust() & 0x3));
+
+ return std::make_pair(GPRMask, VFPMask);
+}
+}
+}
+}
+
diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk
index 6efccf5..5de8d3f 100644
--- a/lib/Support/Android.mk
+++ b/lib/Support/Android.mk
@@ -6,6 +6,7 @@ support_SRC_FILES := \
APInt.cpp \
APSInt.cpp \
ARMBuildAttrs.cpp \
+ ARMWinEH.cpp \
Atomic.cpp \
BlockFrequency.cpp \
BranchProbability.cpp \
@@ -49,13 +50,16 @@ support_SRC_FILES := \
PrettyStackTrace.cpp \
Process.cpp \
Program.cpp \
+ RandomNumberGenerator.cpp \
Regex.cpp \
RWMutex.cpp \
+ ScaledNumber.cpp \
SearchForAddressOfSpecialSymbol.cpp \
Signals.cpp \
SmallPtrSet.cpp \
SmallVector.cpp \
SourceMgr.cpp \
+ SpecialCaseList.cpp \
Statistic.cpp \
StreamableMemoryObject.cpp \
StringExtras.cpp \
@@ -84,8 +88,7 @@ support_SRC_FILES := \
regerror.c \
regexec.c \
regfree.c \
- regstrlcpy.c \
- system_error.cpp
+ regstrlcpy.c
# For the host
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 2ef32b0..ac4ff3e 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This header file implements atomic operations.
+// This file implements atomic operations.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index b4c674d..9ecd559 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMSupport
APInt.cpp
APSInt.cpp
ARMBuildAttrs.cpp
+ ARMWinEH.cpp
Allocator.cpp
BlockFrequency.cpp
BranchProbability.cpp
@@ -40,10 +41,13 @@ add_llvm_library(LLVMSupport
MD5.cpp
PluginLoader.cpp
PrettyStackTrace.cpp
+ RandomNumberGenerator.cpp
Regex.cpp
+ ScaledNumber.cpp
SmallPtrSet.cpp
SmallVector.cpp
SourceMgr.cpp
+ SpecialCaseList.cpp
Statistic.cpp
StreamableMemoryObject.cpp
StringExtras.cpp
@@ -82,7 +86,6 @@ add_llvm_library(LLVMSupport
RWMutex.cpp
SearchForAddressOfSpecialSymbol.cpp
Signals.cpp
- system_error.cpp
TargetRegistry.cpp
ThreadLocal.cpp
Threading.cpp
@@ -99,7 +102,6 @@ add_llvm_library(LLVMSupport
Unix/Program.inc
Unix/RWMutex.inc
Unix/Signals.inc
- Unix/system_error.inc
Unix/ThreadLocal.inc
Unix/TimeValue.inc
Unix/Watchdog.inc
@@ -112,7 +114,6 @@ add_llvm_library(LLVMSupport
Windows/Program.inc
Windows/RWMutex.inc
Windows/Signals.inc
- Windows/system_error.inc
Windows/ThreadLocal.inc
Windows/TimeValue.inc
Windows/Watchdog.inc
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 37bbf48..87348f7 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -31,10 +31,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <cerrno>
#include <cstdlib>
#include <map>
+#include <system_error>
using namespace llvm;
using namespace cl;
@@ -145,6 +145,7 @@ void OptionCategory::registerCategory() {
static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
SmallVectorImpl<Option*> &SinkOpts,
StringMap<Option*> &OptionsMap) {
+ bool HadErrors = false;
SmallVector<const char*, 16> OptionNames;
Option *CAOpt = nullptr; // The ConsumeAfter option if it exists.
for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
@@ -158,8 +159,9 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
// Add argument to the argument map!
if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
- errs() << ProgramName << ": CommandLine Error: Argument '"
- << OptionNames[i] << "' defined more than once!\n";
+ errs() << ProgramName << ": CommandLine Error: Option '"
+ << OptionNames[i] << "' registered more than once!\n";
+ HadErrors = true;
}
}
@@ -171,8 +173,10 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
else if (O->getMiscFlags() & cl::Sink) // Remember sink options
SinkOpts.push_back(O);
else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
- if (CAOpt)
+ if (CAOpt) {
O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+ HadErrors = true;
+ }
CAOpt = O;
}
}
@@ -182,6 +186,12 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
// Make sure that they are in order of registration not backwards.
std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+
+ // Fail hard if there were errors. These are strictly unrecoverable and
+ // indicate serious issues such as conflicting option names or an incorrectly
+ // linked LLVM distribution.
+ if (HadErrors)
+ report_fatal_error("inconsistency in registered CommandLine options");
}
@@ -621,9 +631,11 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
TokenizerCallback Tokenizer,
SmallVectorImpl<const char *> &NewArgv) {
- std::unique_ptr<MemoryBuffer> MemBuf;
- if (MemoryBuffer::getFile(FName, MemBuf))
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
+ MemoryBuffer::getFile(FName);
+ if (!MemBufOrErr)
return false;
+ std::unique_ptr<MemoryBuffer> MemBuf = std::move(MemBufOrErr.get());
StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize());
// If we have a UTF-16 byte order mark, convert to UTF-8 for parsing.
@@ -1699,7 +1711,7 @@ public:
OS << "LLVM (http://llvm.org/):\n"
<< " " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
#ifdef LLVM_VERSION_INFO
- OS << LLVM_VERSION_INFO;
+ OS << " " << LLVM_VERSION_INFO;
#endif
OS << "\n ";
#ifndef __OPTIMIZE__
diff --git a/lib/Support/ConvertUTF.c b/lib/Support/ConvertUTF.c
index 23f17ca..128459a 100644
--- a/lib/Support/ConvertUTF.c
+++ b/lib/Support/ConvertUTF.c
@@ -51,6 +51,7 @@
#ifdef CVTUTF_DEBUG
#include <stdio.h>
#endif
+#include <assert.h>
static const int halfShift = 10; /* used for shifting by 10 bits */
@@ -392,6 +393,99 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
/* --------------------------------------------------------------------- */
+static unsigned
+findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
+ const UTF8 *sourceEnd) {
+ UTF8 b1, b2, b3;
+
+ assert(!isLegalUTF8Sequence(source, sourceEnd));
+
+ /*
+ * Unicode 6.3.0, D93b:
+ *
+ * Maximal subpart of an ill-formed subsequence: The longest code unit
+ * subsequence starting at an unconvertible offset that is either:
+ * a. the initial subsequence of a well-formed code unit sequence, or
+ * b. a subsequence of length one.
+ */
+
+ if (source == sourceEnd)
+ return 0;
+
+ /*
+ * Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
+ * Byte Sequences.
+ */
+
+ b1 = *source;
+ ++source;
+ if (b1 >= 0xC2 && b1 <= 0xDF) {
+ /*
+ * First byte is valid, but we know that this code unit sequence is
+ * invalid, so the maximal subpart has to end after the first byte.
+ */
+ return 1;
+ }
+
+ if (source == sourceEnd)
+ return 1;
+
+ b2 = *source;
+ ++source;
+
+ if (b1 == 0xE0) {
+ return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
+ }
+ if (b1 >= 0xE1 && b1 <= 0xEC) {
+ return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
+ }
+ if (b1 == 0xED) {
+ return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
+ }
+ if (b1 >= 0xEE && b1 <= 0xEF) {
+ return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
+ }
+ if (b1 == 0xF0) {
+ if (b2 >= 0x90 && b2 <= 0xBF) {
+ if (source == sourceEnd)
+ return 2;
+
+ b3 = *source;
+ return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+ }
+ return 1;
+ }
+ if (b1 >= 0xF1 && b1 <= 0xF3) {
+ if (b2 >= 0x80 && b2 <= 0xBF) {
+ if (source == sourceEnd)
+ return 2;
+
+ b3 = *source;
+ return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+ }
+ return 1;
+ }
+ if (b1 == 0xF4) {
+ if (b2 >= 0x80 && b2 <= 0x8F) {
+ if (source == sourceEnd)
+ return 2;
+
+ b3 = *source;
+ return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
+ }
+ return 1;
+ }
+
+ assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
+ /*
+ * There are no valid sequences that start with these bytes. Maximal subpart
+ * is defined to have length 1 in these cases.
+ */
+ return 1;
+}
+
+/* --------------------------------------------------------------------- */
+
/*
* Exported function to return the total number of bytes in a codepoint
* represented in UTF-8, given the value of the first byte.
@@ -491,9 +585,10 @@ ConversionResult ConvertUTF8toUTF16 (
/* --------------------------------------------------------------------- */
-ConversionResult ConvertUTF8toUTF32 (
+static ConversionResult ConvertUTF8toUTF32Impl(
const UTF8** sourceStart, const UTF8* sourceEnd,
- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
+ Boolean InputIsPartial) {
ConversionResult result = conversionOK;
const UTF8* source = *sourceStart;
UTF32* target = *targetStart;
@@ -501,12 +596,42 @@ ConversionResult ConvertUTF8toUTF32 (
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
if (extraBytesToRead >= sourceEnd - source) {
- result = sourceExhausted; break;
+ if (flags == strictConversion || InputIsPartial) {
+ result = sourceExhausted;
+ break;
+ } else {
+ result = sourceIllegal;
+
+ /*
+ * Replace the maximal subpart of ill-formed sequence with
+ * replacement character.
+ */
+ source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
+ sourceEnd);
+ *target++ = UNI_REPLACEMENT_CHAR;
+ continue;
+ }
}
+ if (target >= targetEnd) {
+ result = targetExhausted; break;
+ }
+
/* Do this check whether lenient or strict */
if (!isLegalUTF8(source, extraBytesToRead+1)) {
result = sourceIllegal;
- break;
+ if (flags == strictConversion) {
+ /* Abort conversion. */
+ break;
+ } else {
+ /*
+ * Replace the maximal subpart of ill-formed sequence with
+ * replacement character.
+ */
+ source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
+ sourceEnd);
+ *target++ = UNI_REPLACEMENT_CHAR;
+ continue;
+ }
}
/*
* The cases all fall through. See "Note A" below.
@@ -521,10 +646,6 @@ ConversionResult ConvertUTF8toUTF32 (
}
ch -= offsetsFromUTF8[extraBytesToRead];
- if (target >= targetEnd) {
- source -= (extraBytesToRead+1); /* Back up the source pointer! */
- result = targetExhausted; break;
- }
if (ch <= UNI_MAX_LEGAL_UTF32) {
/*
* UTF-16 surrogate values are illegal in UTF-32, and anything
@@ -551,6 +672,22 @@ ConversionResult ConvertUTF8toUTF32 (
return result;
}
+ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
+ const UTF8 *sourceEnd,
+ UTF32 **targetStart,
+ UTF32 *targetEnd,
+ ConversionFlags flags) {
+ return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
+ flags, /*InputIsPartial=*/true);
+}
+
+ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
+ const UTF8 *sourceEnd, UTF32 **targetStart,
+ UTF32 *targetEnd, ConversionFlags flags) {
+ return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
+ flags, /*InputIsPartial=*/false);
+}
+
/* ---------------------------------------------------------------------
Note A.
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index a426377..9b0e443 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -22,7 +22,8 @@ namespace {
struct CrashRecoveryContextImpl;
-static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
+static ManagedStatic<
+ sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
struct CrashRecoveryContextImpl {
CrashRecoveryContext *CRC;
@@ -231,7 +232,8 @@ void CrashRecoveryContext::Disable() {
#include <signal.h>
-static const int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const int Signals[] =
+ { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
static struct sigaction PrevActions[NumSignals];
@@ -330,12 +332,26 @@ const std::string &CrashRecoveryContext::getBacktrace() const {
return CRC->Backtrace;
}
-//
+// FIXME: Portability.
+static void setThreadBackgroundPriority() {
+#ifdef __APPLE__
+ setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG);
+#endif
+}
+
+static bool hasThreadBackgroundPriority() {
+#ifdef __APPLE__
+ return getpriority(PRIO_DARWIN_THREAD, 0) == 1;
+#else
+ return false;
+#endif
+}
namespace {
struct RunSafelyOnThreadInfo {
function_ref<void()> Fn;
CrashRecoveryContext *CRC;
+ bool UseBackgroundPriority;
bool Result;
};
}
@@ -343,11 +359,16 @@ struct RunSafelyOnThreadInfo {
static void RunSafelyOnThread_Dispatch(void *UserData) {
RunSafelyOnThreadInfo *Info =
reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+
+ if (Info->UseBackgroundPriority)
+ setThreadBackgroundPriority();
+
Info->Result = Info->CRC->RunSafely(Info->Fn);
}
bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
unsigned RequestedStackSize) {
- RunSafelyOnThreadInfo Info = { Fn, this, false };
+ bool UseBackgroundPriority = hasThreadBackgroundPriority();
+ RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false };
llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl)
CRC->setSwitchedThread();
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index 7b82921..5d6d60a 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -21,7 +21,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de,
if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) {
std::memcpy(&val, &Data[offset], sizeof(val));
if (sys::IsLittleEndianHost != isLittleEndian)
- val = sys::SwapByteOrder(val);
+ sys::swapByteOrder(val);
// Advance the offset
*offset_ptr += sizeof(val);
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index eec8584..32653de 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -18,10 +18,10 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/system_error.h"
#include <cerrno>
#include <cstdio>
#include <string>
+#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
@@ -64,11 +64,11 @@ public:
return read(Fd, buf, len);
}
- error_code OpenFile(const std::string &Filename) {
+ std::error_code OpenFile(const std::string &Filename) {
if (Filename == "-") {
Fd = 0;
sys::ChangeStdinToBinary();
- return error_code::success();
+ return std::error_code();
}
return sys::fs::openFileForRead(Filename, Fd);
@@ -81,7 +81,7 @@ namespace llvm {
DataStreamer *getDataFileStreamer(const std::string &Filename,
std::string *StrError) {
DataFileStreamer *s = new DataFileStreamer();
- if (error_code e = s->OpenFile(Filename)) {
+ if (std::error_code e = s->OpenFile(Filename)) {
*StrError = std::string("Could not open ") + Filename + ": " +
e.message() + "\n";
return nullptr;
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 82d7c0c..d2b551e 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This header file implements the operating system DynamicLibrary concept.
+// This file implements the operating system DynamicLibrary concept.
//
// FIXME: This file leaks ExplicitSymbols and OpenedHandles!
//
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 342c4f0..c36007f 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -18,8 +18,12 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/WindowsError.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdlib>
@@ -37,17 +41,20 @@ using namespace llvm;
static fatal_error_handler_t ErrorHandler = nullptr;
static void *ErrorHandlerUserData = nullptr;
+static sys::Mutex ErrorHandlerMutex;
+
void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
void *user_data) {
- assert(!llvm_is_multithreaded() &&
- "Cannot register error handlers after starting multithreaded mode!\n");
+ llvm::MutexGuard Lock(ErrorHandlerMutex);
assert(!ErrorHandler && "Error handler already registered!\n");
ErrorHandler = handler;
ErrorHandlerUserData = user_data;
}
void llvm::remove_fatal_error_handler() {
+ llvm::MutexGuard Lock(ErrorHandlerMutex);
ErrorHandler = nullptr;
+ ErrorHandlerUserData = nullptr;
}
void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
@@ -63,8 +70,18 @@ void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
}
void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
- if (ErrorHandler) {
- ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag);
+ llvm::fatal_error_handler_t handler = nullptr;
+ void* handlerData = nullptr;
+ {
+ // Only acquire the mutex while reading the handler, so as not to invoke a
+ // user-supplied callback under a lock.
+ llvm::MutexGuard Lock(ErrorHandlerMutex);
+ handler = ErrorHandler;
+ handlerData = ErrorHandlerUserData;
+ }
+
+ if (handler) {
+ handler(handlerData, Reason.str(), GenCrashDiag);
} else {
// Blast the result out to stderr. We don't try hard to make sure this
// succeeds (e.g. handling EINTR) and we can't use errs() here because
@@ -119,3 +136,70 @@ void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
void LLVMResetFatalErrorHandler() {
remove_fatal_error_handler();
}
+
+#ifdef LLVM_ON_WIN32
+
+#include <winerror.h>
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) \
+ case x: \
+ return make_error_code(errc::y)
+
+std::error_code llvm::mapWindowsError(unsigned EV) {
+ switch (EV) {
+ MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
+ MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
+ MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
+ MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
+ MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
+ MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
+ MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
+ MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
+ MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
+ MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
+ MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
+ MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
+ MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
+ MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
+ MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
+ MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
+ MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_SEEK, io_error);
+ MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
+ MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+ MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
+ MAP_ERR_TO_COND(WSAEACCES, permission_denied);
+ MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
+ MAP_ERR_TO_COND(WSAEFAULT, bad_address);
+ MAP_ERR_TO_COND(WSAEINTR, interrupted);
+ MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
+ MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
+ MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
+ default:
+ return std::error_code(EV, std::system_category());
+ }
+}
+
+#endif
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 49311c2..2e740ca 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileOutputBuffer.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
using llvm::sys::fs::mapped_file_region;
@@ -30,13 +31,13 @@ FileOutputBuffer::~FileOutputBuffer() {
sys::fs::remove(Twine(TempPath));
}
-error_code FileOutputBuffer::create(StringRef FilePath,
- size_t Size,
- std::unique_ptr<FileOutputBuffer> &Result,
- unsigned Flags) {
+std::error_code
+FileOutputBuffer::create(StringRef FilePath, size_t Size,
+ std::unique_ptr<FileOutputBuffer> &Result,
+ unsigned Flags) {
// If file already exists, it must be a regular file (to be mappable).
sys::fs::file_status Stat;
- error_code EC = sys::fs::status(FilePath, Stat);
+ std::error_code EC = sys::fs::status(FilePath, Stat);
switch (Stat.type()) {
case sys::fs::file_type::file_not_found:
// If file does not exist, we'll create one.
@@ -81,16 +82,16 @@ error_code FileOutputBuffer::create(StringRef FilePath,
if (Result)
MappedFile.release();
- return error_code::success();
+ return std::error_code();
}
-error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
+std::error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
// Unmap buffer, letting OS flush dirty pages to file on disk.
Region.reset(nullptr);
// If requested, resize file as part of commit.
if ( NewSmallerSize != -1 ) {
- error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+ std::error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
if (EC)
return EC;
}
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index b2dc47d..8a23491 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -17,10 +17,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <cctype>
#include <cstdlib>
#include <cstring>
+#include <system_error>
using namespace llvm;
static bool isSignedChar(char C) {
@@ -176,18 +176,21 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
std::string *Error) {
// Now its safe to mmap the files into memory because both files
// have a non-zero size.
- std::unique_ptr<MemoryBuffer> F1;
- if (error_code ec = MemoryBuffer::getFile(NameA, F1)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> F1OrErr = MemoryBuffer::getFile(NameA);
+ if (std::error_code EC = F1OrErr.getError()) {
if (Error)
- *Error = ec.message();
+ *Error = EC.message();
return 2;
}
- std::unique_ptr<MemoryBuffer> F2;
- if (error_code ec = MemoryBuffer::getFile(NameB, F2)) {
+ std::unique_ptr<MemoryBuffer> F1 = std::move(F1OrErr.get());
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> F2OrErr = MemoryBuffer::getFile(NameB);
+ if (std::error_code EC = F2OrErr.getError()) {
if (Error)
- *Error = ec.message();
+ *Error = EC.message();
return 2;
}
+ std::unique_ptr<MemoryBuffer> F2 = std::move(F2OrErr.get());
// Okay, now that we opened the files, scan them for the first difference.
const char *File1Start = F1->getBufferStart();
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index f5b2943..e68ee43 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -68,7 +68,7 @@ StringRef llvm::DOT::getColorString(unsigned ColorNumber) {
std::string llvm::createGraphFilename(const Twine &Name, int &FD) {
FD = -1;
SmallString<128> Filename;
- error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename);
+ std::error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename);
if (EC) {
errs() << "Error: " << EC.message() << "\n";
return "";
@@ -78,148 +78,165 @@ std::string llvm::createGraphFilename(const Twine &Name, int &FD) {
return Filename.str();
}
-// Execute the graph viewer. Return true if successful.
-static bool LLVM_ATTRIBUTE_UNUSED
-ExecGraphViewer(StringRef ExecPath, std::vector<const char*> &args,
- StringRef Filename, bool wait, std::string &ErrMsg) {
+// Execute the graph viewer. Return true if there were errors.
+static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args,
+ StringRef Filename, bool wait,
+ std::string &ErrMsg) {
+ assert(args.back() == nullptr);
if (wait) {
- if (sys::ExecuteAndWait(ExecPath, &args[0],nullptr,nullptr,0,0,&ErrMsg)) {
+ if (sys::ExecuteAndWait(ExecPath, args.data(), nullptr, nullptr, 0, 0,
+ &ErrMsg)) {
errs() << "Error: " << ErrMsg << "\n";
- return false;
+ return true;
}
sys::fs::remove(Filename);
errs() << " done. \n";
- }
- else {
- sys::ExecuteNoWait(ExecPath, &args[0],nullptr,nullptr,0,&ErrMsg);
+ } else {
+ sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg);
errs() << "Remember to erase graph file: " << Filename.str() << "\n";
}
- return true;
+ return false;
+}
+
+struct GraphSession {
+ std::string LogBuffer;
+ bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
+ raw_string_ostream Log(LogBuffer);
+ SmallVector<StringRef, 8> parts;
+ Names.split(parts, "|");
+ for (auto Name : parts) {
+ ProgramPath = sys::FindProgramByName(Name);
+ if (!ProgramPath.empty())
+ return true;
+ Log << " Tried '" << Name << "'\n";
+ }
+ return false;
+ }
+};
+
+static const char *getProgramName(GraphProgram::Name program) {
+ switch (program) {
+ case GraphProgram::DOT:
+ return "dot";
+ case GraphProgram::FDP:
+ return "fdp";
+ case GraphProgram::NEATO:
+ return "neato";
+ case GraphProgram::TWOPI:
+ return "twopi";
+ case GraphProgram::CIRCO:
+ return "circo";
+ }
+ llvm_unreachable("bad kind");
}
-void llvm::DisplayGraph(StringRef FilenameRef, bool wait,
+bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
GraphProgram::Name program) {
std::string Filename = FilenameRef;
wait &= !ViewBackground;
std::string ErrMsg;
-#if HAVE_GRAPHVIZ
- std::string Graphviz(LLVM_PATH_GRAPHVIZ);
-
- std::vector<const char*> args;
- args.push_back(Graphviz.c_str());
- args.push_back(Filename.c_str());
- args.push_back(nullptr);
-
- errs() << "Running 'Graphviz' program... ";
- if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
- return;
-
-#elif HAVE_XDOT
- std::vector<const char*> args;
- args.push_back(LLVM_PATH_XDOT);
- args.push_back(Filename.c_str());
-
- switch (program) {
- case GraphProgram::DOT: args.push_back("-f"); args.push_back("dot"); break;
- case GraphProgram::FDP: args.push_back("-f"); args.push_back("fdp"); break;
- case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
- case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
- case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+ std::string ViewerPath;
+ GraphSession S;
+
+ // Graphviz
+ if (S.TryFindProgram("Graphviz", ViewerPath)) {
+ std::vector<const char *> args;
+ args.push_back(ViewerPath.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(nullptr);
+
+ errs() << "Running 'Graphviz' program... ";
+ return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg);
}
- args.push_back(0);
+ // xdot
+ if (S.TryFindProgram("xdot|xdot.py", ViewerPath)) {
+ std::vector<const char *> args;
+ args.push_back(ViewerPath.c_str());
+ args.push_back(Filename.c_str());
- errs() << "Running 'xdot.py' program... ";
- if (!ExecGraphViewer(LLVM_PATH_XDOT, args, Filename, wait, ErrMsg))
- return;
+ args.push_back("-f");
+ args.push_back(getProgramName(program));
-#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
- HAVE_TWOPI || HAVE_CIRCO))
- std::string PSFilename = Filename + ".ps";
- std::string prog;
+ args.push_back(nullptr);
- // Set default grapher
-#if HAVE_CIRCO
- prog = LLVM_PATH_CIRCO;
-#endif
-#if HAVE_TWOPI
- prog = LLVM_PATH_TWOPI;
-#endif
-#if HAVE_NEATO
- prog = LLVM_PATH_NEATO;
-#endif
-#if HAVE_FDP
- prog = LLVM_PATH_FDP;
-#endif
-#if HAVE_DOT
- prog = LLVM_PATH_DOT;
-#endif
+ errs() << "Running 'xdot.py' program... ";
+ return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg);
+ }
- // Find which program the user wants
-#if HAVE_DOT
- if (program == GraphProgram::DOT)
- prog = LLVM_PATH_DOT;
-#endif
-#if (HAVE_FDP)
- if (program == GraphProgram::FDP)
- prog = LLVM_PATH_FDP;
-#endif
-#if (HAVE_NEATO)
- if (program == GraphProgram::NEATO)
- prog = LLVM_PATH_NEATO;
-#endif
-#if (HAVE_TWOPI)
- if (program == GraphProgram::TWOPI)
- prog = LLVM_PATH_TWOPI;
-#endif
-#if (HAVE_CIRCO)
- if (program == GraphProgram::CIRCO)
- prog = LLVM_PATH_CIRCO;
+ enum PSViewerKind { PSV_None, PSV_OSXOpen, PSV_XDGOpen, PSV_Ghostview };
+ PSViewerKind PSViewer = PSV_None;
+#ifdef __APPLE__
+ if (!PSViewer && S.TryFindProgram("open", ViewerPath))
+ PSViewer = PSV_OSXOpen;
#endif
+ if (!PSViewer && S.TryFindProgram("gv", ViewerPath))
+ PSViewer = PSV_Ghostview;
+ if (!PSViewer && S.TryFindProgram("xdg-open", ViewerPath))
+ PSViewer = PSV_XDGOpen;
+
+ // PostScript graph generator + PostScript viewer
+ std::string GeneratorPath;
+ if (PSViewer &&
+ (S.TryFindProgram(getProgramName(program), GeneratorPath) ||
+ S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) {
+ std::string PSFilename = Filename + ".ps";
+
+ std::vector<const char *> args;
+ args.push_back(GeneratorPath.c_str());
+ args.push_back("-Tps");
+ args.push_back("-Nfontname=Courier");
+ args.push_back("-Gsize=7.5,10");
+ args.push_back(Filename.c_str());
+ args.push_back("-o");
+ args.push_back(PSFilename.c_str());
+ args.push_back(nullptr);
+
+ errs() << "Running '" << GeneratorPath << "' program... ";
+
+ if (ExecGraphViewer(GeneratorPath, args, Filename, wait, ErrMsg))
+ return true;
+
+ args.clear();
+ args.push_back(ViewerPath.c_str());
+ switch (PSViewer) {
+ case PSV_OSXOpen:
+ args.push_back("-W");
+ args.push_back(PSFilename.c_str());
+ break;
+ case PSV_XDGOpen:
+ wait = false;
+ args.push_back(PSFilename.c_str());
+ break;
+ case PSV_Ghostview:
+ args.push_back("--spartan");
+ args.push_back(PSFilename.c_str());
+ break;
+ case PSV_None:
+ llvm_unreachable("Invalid viewer");
+ }
+ args.push_back(nullptr);
- std::vector<const char*> args;
- args.push_back(prog.c_str());
- args.push_back("-Tps");
- args.push_back("-Nfontname=Courier");
- args.push_back("-Gsize=7.5,10");
- args.push_back(Filename.c_str());
- args.push_back("-o");
- args.push_back(PSFilename.c_str());
- args.push_back(0);
-
- errs() << "Running '" << prog << "' program... ";
-
- if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
- return;
-
- std::string gv(LLVM_PATH_GV);
- args.clear();
- args.push_back(gv.c_str());
- args.push_back(PSFilename.c_str());
- args.push_back("--spartan");
- args.push_back(0);
-
- ErrMsg.clear();
- if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
- return;
-
-#elif HAVE_DOTTY
- std::string dotty(LLVM_PATH_DOTTY);
+ ErrMsg.clear();
+ return ExecGraphViewer(ViewerPath, args, PSFilename, wait, ErrMsg);
+ }
- std::vector<const char*> args;
- args.push_back(dotty.c_str());
- args.push_back(Filename.c_str());
- args.push_back(0);
+ // dotty
+ if (S.TryFindProgram("dotty", ViewerPath)) {
+ std::vector<const char *> args;
+ args.push_back(ViewerPath.c_str());
+ args.push_back(Filename.c_str());
+ args.push_back(nullptr);
// Dotty spawns another app and doesn't wait until it returns
-#if defined (__MINGW32__) || defined (_WINDOWS)
- wait = false;
-#endif
- errs() << "Running 'dotty' program... ";
- if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
- return;
-#else
- (void)Filename;
- (void)ErrMsg;
+#ifdef LLVM_ON_WIN32
+ wait = false;
#endif
+ errs() << "Running 'dotty' program... ";
+ return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg);
+ }
+
+ errs() << "Error: Couldn't find a usable graph viewer program:\n";
+ errs() << S.LogBuffer << "\n";
+ return true;
}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index fd0472e..e2dd6d5 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This header file implements the operating system Host concept.
+// This file implements the operating system Host concept.
//
//===----------------------------------------------------------------------===//
@@ -570,6 +570,8 @@ StringRef sys::getHostCPUName() {
.Case("A2", "a2")
.Case("POWER6", "pwr6")
.Case("POWER7", "pwr7")
+ .Case("POWER8", "pwr8")
+ .Case("POWER8E", "pwr8")
.Default(generic);
}
#elif defined(__linux__) && defined(__arm__)
@@ -744,7 +746,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
.Default("");
#if defined(__aarch64__)
- // We need to check crypto seperately since we need all of the crypto
+ // We need to check crypto separately since we need all of the crypto
// extensions to enable the subtarget feature
if (CPUFeatures[I] == "aes")
crypto |= CAP_AES;
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 9b4bfbe..3f224e0 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -9,6 +9,7 @@
#include "llvm/Support/LockFileManager.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -32,11 +33,13 @@ Optional<std::pair<std::string, int> >
LockFileManager::readLockFile(StringRef LockFileName) {
// Read the owning host and PID out of the lock file. If it appears that the
// owning process is dead, the lock file is invalid.
- std::unique_ptr<MemoryBuffer> MB;
- if (MemoryBuffer::getFile(LockFileName, MB)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
+ MemoryBuffer::getFile(LockFileName);
+ if (!MBOrErr) {
sys::fs::remove(LockFileName);
return None;
}
+ std::unique_ptr<MemoryBuffer> MB = std::move(MBOrErr.get());
StringRef Hostname;
StringRef PIDStr;
@@ -71,7 +74,7 @@ bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
LockFileManager::LockFileManager(StringRef FileName)
{
this->FileName = FileName;
- if (error_code EC = sys::fs::make_absolute(this->FileName)) {
+ if (std::error_code EC = sys::fs::make_absolute(this->FileName)) {
Error = EC;
return;
}
@@ -87,10 +90,8 @@ LockFileManager::LockFileManager(StringRef FileName)
UniqueLockFileName = LockFileName;
UniqueLockFileName += "-%%%%%%%%";
int UniqueLockFileID;
- if (error_code EC
- = sys::fs::createUniqueFile(UniqueLockFileName.str(),
- UniqueLockFileID,
- UniqueLockFileName)) {
+ if (std::error_code EC = sys::fs::createUniqueFile(
+ UniqueLockFileName.str(), UniqueLockFileID, UniqueLockFileName)) {
Error = EC;
return;
}
@@ -122,9 +123,9 @@ LockFileManager::LockFileManager(StringRef FileName)
while (1) {
// Create a link from the lock file name. If this succeeds, we're done.
- error_code EC =
+ std::error_code EC =
sys::fs::create_link(UniqueLockFileName.str(), LockFileName.str());
- if (EC == errc::success)
+ if (!EC)
return;
if (EC != errc::file_exists) {
diff --git a/lib/Support/Makefile b/lib/Support/Makefile
index 4a2185d..39426aa 100644
--- a/lib/Support/Makefile
+++ b/lib/Support/Makefile
@@ -17,3 +17,7 @@ include $(LEVEL)/Makefile.common
CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
+
+ifdef LLVM_VERSION_INFO
+CompileCommonOpts += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"'
+endif
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6a1c2a5..b8fb284 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -14,16 +14,26 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
#include <cassert>
using namespace llvm;
static const ManagedStaticBase *StaticList = nullptr;
+static sys::Mutex& getManagedStaticMutex() {
+ // We need to use a function local static here, since this can get called
+ // during a static constructor and we need to guarantee that it's initialized
+ // correctly.
+ static sys::Mutex ManagedStaticMutex;
+ return ManagedStaticMutex;
+}
+
void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
void (*Deleter)(void*)) const {
assert(Creator);
if (llvm_is_multithreaded()) {
- llvm_acquire_global_lock();
+ MutexGuard Lock(getManagedStaticMutex());
if (!Ptr) {
void* tmp = Creator();
@@ -43,8 +53,6 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
Next = StaticList;
StaticList = this;
}
-
- llvm_release_global_lock();
} else {
assert(!Ptr && !DeleterFn && !Next &&
"Partially initialized ManagedStatic!?");
@@ -75,8 +83,8 @@ void ManagedStaticBase::destroy() const {
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
void llvm::llvm_shutdown() {
+ MutexGuard Lock(getManagedStaticMutex());
+
while (StaticList)
StaticList->destroy();
-
- if (llvm_is_multithreaded()) llvm_stop_multithreaded();
}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 629d885..5f4b7da 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -14,19 +14,20 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Errno.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/system_error.h"
#include <cassert>
#include <cerrno>
#include <cstdio>
#include <cstring>
#include <new>
#include <sys/types.h>
+#include <system_error>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
@@ -151,17 +152,11 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
return SB;
}
-
-/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
-/// if the Filename is "-". If an error occurs, this returns null and fills
-/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
-/// returns an empty buffer.
-error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- int64_t FileSize) {
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getFileOrSTDIN(StringRef Filename, int64_t FileSize) {
if (Filename == "-")
- return getSTDIN(Result);
- return getFile(Filename, Result, FileSize);
+ return getSTDIN();
+ return getFile(Filename, FileSize);
}
@@ -190,7 +185,7 @@ class MemoryBufferMMapFile : public MemoryBuffer {
public:
MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len,
- uint64_t Offset, error_code EC)
+ uint64_t Offset, std::error_code EC)
: MFR(FD, false, sys::fs::mapped_file_region::readonly,
getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) {
if (!EC) {
@@ -210,9 +205,8 @@ public:
};
}
-static error_code getMemoryBufferForStream(int FD,
- StringRef BufferName,
- std::unique_ptr<MemoryBuffer> &Result) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getMemoryBufferForStream(int FD, StringRef BufferName) {
const ssize_t ChunkSize = 4096*4;
SmallString<ChunkSize> Buffer;
ssize_t ReadBytes;
@@ -222,52 +216,48 @@ static error_code getMemoryBufferForStream(int FD,
ReadBytes = read(FD, Buffer.end(), ChunkSize);
if (ReadBytes == -1) {
if (errno == EINTR) continue;
- return error_code(errno, posix_category());
+ return std::error_code(errno, std::generic_category());
}
Buffer.set_size(Buffer.size() + ReadBytes);
} while (ReadBytes != 0);
- Result.reset(MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
- return error_code::success();
+ std::unique_ptr<MemoryBuffer> Ret(
+ MemoryBuffer::getMemBufferCopy(Buffer, BufferName));
+ return std::move(Ret);
}
-static error_code getFileAux(const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- int64_t FileSize,
- bool RequiresNullTerminator,
- bool IsVolatileSize);
-
-error_code MemoryBuffer::getFile(Twine Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- int64_t FileSize,
- bool RequiresNullTerminator,
- bool IsVolatileSize) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator,
+ bool IsVolatileSize);
+
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getFile(Twine Filename, int64_t FileSize,
+ bool RequiresNullTerminator, bool IsVolatileSize) {
// Ensure the path is null terminated.
SmallString<256> PathBuf;
StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
- return getFileAux(NullTerminatedName.data(), Result, FileSize,
- RequiresNullTerminator, IsVolatileSize);
+ return getFileAux(NullTerminatedName.data(), FileSize, RequiresNullTerminator,
+ IsVolatileSize);
}
-static error_code getOpenFileImpl(int FD, const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- uint64_t FileSize, uint64_t MapSize,
- int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatileSize);
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
+ uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatileSize);
-static error_code getFileAux(const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result, int64_t FileSize,
- bool RequiresNullTerminator,
- bool IsVolatileSize) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator,
+ bool IsVolatileSize) {
int FD;
- error_code EC = sys::fs::openFileForRead(Filename, FD);
+ std::error_code EC = sys::fs::openFileForRead(Filename, FD);
if (EC)
return EC;
- error_code ret = getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
- RequiresNullTerminator, IsVolatileSize);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
+ getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
+ RequiresNullTerminator, IsVolatileSize);
close(FD);
- return ret;
+ return Ret;
}
static bool shouldUseMmap(int FD,
@@ -318,11 +308,10 @@ static bool shouldUseMmap(int FD,
return true;
}
-static error_code getOpenFileImpl(int FD, const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- uint64_t FileSize, uint64_t MapSize,
- int64_t Offset, bool RequiresNullTerminator,
- bool IsVolatileSize) {
+static ErrorOr<std::unique_ptr<MemoryBuffer>>
+getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize,
+ uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatileSize) {
static int PageSize = sys::process::get_self()->page_size();
// Default is to map the full file.
@@ -331,7 +320,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
// file descriptor is cheaper than stat on a random path.
if (FileSize == uint64_t(-1)) {
sys::fs::file_status Status;
- error_code EC = sys::fs::status(FD, Status);
+ std::error_code EC = sys::fs::status(FD, Status);
if (EC)
return EC;
@@ -341,7 +330,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
sys::fs::file_type Type = Status.type();
if (Type != sys::fs::file_type::regular_file &&
Type != sys::fs::file_type::block_file)
- return getMemoryBufferForStream(FD, Filename, Result);
+ return getMemoryBufferForStream(FD, Filename);
FileSize = Status.getSize();
}
@@ -350,11 +339,12 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
PageSize, IsVolatileSize)) {
- error_code EC;
- Result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
- RequiresNullTerminator, FD, MapSize, Offset, EC));
+ std::error_code EC;
+ std::unique_ptr<MemoryBuffer> Result(
+ new (NamedBufferAlloc(Filename))
+ MemoryBufferMMapFile(RequiresNullTerminator, FD, MapSize, Offset, EC));
if (!EC)
- return error_code::success();
+ return std::move(Result);
}
MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
@@ -370,7 +360,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
size_t BytesLeft = MapSize;
#ifndef HAVE_PREAD
if (lseek(FD, Offset, SEEK_SET) == -1)
- return error_code(errno, posix_category());
+ return std::error_code(errno, std::generic_category());
#endif
while (BytesLeft) {
@@ -383,7 +373,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
if (errno == EINTR)
continue;
// Error while reading.
- return error_code(errno, posix_category());
+ return std::error_code(errno, std::generic_category());
}
if (NumRead == 0) {
memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
@@ -393,37 +383,29 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
BufPtr += NumRead;
}
- Result.swap(SB);
- return error_code::success();
+ return std::move(SB);
}
-error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- uint64_t FileSize,
- bool RequiresNullTerminator,
- bool IsVolatileSize) {
- return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getOpenFile(int FD, const char *Filename, uint64_t FileSize,
+ bool RequiresNullTerminator, bool IsVolatileSize) {
+ return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0,
RequiresNullTerminator, IsVolatileSize);
}
-error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename,
- std::unique_ptr<MemoryBuffer> &Result,
- uint64_t MapSize, int64_t Offset,
- bool IsVolatileSize) {
- return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false,
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, uint64_t MapSize,
+ int64_t Offset, bool IsVolatileSize) {
+ return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false,
IsVolatileSize);
}
-//===----------------------------------------------------------------------===//
-// MemoryBuffer::getSTDIN implementation.
-//===----------------------------------------------------------------------===//
-
-error_code MemoryBuffer::getSTDIN(std::unique_ptr<MemoryBuffer> &Result) {
+ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
// Read in all of the data from stdin, we cannot mmap stdin.
//
// FIXME: That isn't necessarily true, we should try to mmap stdin and
// fallback if it fails.
sys::ChangeStdinToBinary();
- return getMemoryBufferForStream(0, "<stdin>", Result);
+ return getMemoryBufferForStream(0, "<stdin>");
}
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index b8d676f..d5a0ec5 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
@@ -164,12 +165,12 @@ enum FSEntity {
};
// Implemented in Unix/Path.inc and Windows/Path.inc.
-static error_code TempDir(SmallVectorImpl<char> &result);
+static std::error_code TempDir(SmallVectorImpl<char> &result);
-static error_code createUniqueEntity(const Twine &Model, int &ResultFD,
- SmallVectorImpl<char> &ResultPath,
- bool MakeAbsolute, unsigned Mode,
- FSEntity Type) {
+static std::error_code createUniqueEntity(const Twine &Model, int &ResultFD,
+ SmallVectorImpl<char> &ResultPath,
+ bool MakeAbsolute, unsigned Mode,
+ FSEntity Type) {
SmallString<128> ModelStorage;
Model.toVector(ModelStorage);
@@ -177,7 +178,7 @@ static error_code createUniqueEntity(const Twine &Model, int &ResultFD,
// Make model absolute by prepending a temp directory if it's not already.
if (!sys::path::is_absolute(Twine(ModelStorage))) {
SmallString<128> TDir;
- if (error_code EC = TempDir(TDir))
+ if (std::error_code EC = TempDir(TDir))
return EC;
sys::path::append(TDir, Twine(ModelStorage));
ModelStorage.swap(TDir);
@@ -201,7 +202,7 @@ retry_random_path:
// Try to open + create the file.
switch (Type) {
case FS_File: {
- if (error_code EC =
+ if (std::error_code EC =
sys::fs::openFileForWrite(Twine(ResultPath.begin()), ResultFD,
sys::fs::F_RW | sys::fs::F_Excl, Mode)) {
if (EC == errc::file_exists)
@@ -209,26 +210,27 @@ retry_random_path:
return EC;
}
- return error_code::success();
+ return std::error_code();
}
case FS_Name: {
bool Exists;
- error_code EC = sys::fs::exists(ResultPath.begin(), Exists);
+ std::error_code EC = sys::fs::exists(ResultPath.begin(), Exists);
if (EC)
return EC;
if (Exists)
goto retry_random_path;
- return error_code::success();
+ return std::error_code();
}
case FS_Dir: {
- if (error_code EC = sys::fs::create_directory(ResultPath.begin(), false)) {
+ if (std::error_code EC =
+ sys::fs::create_directory(ResultPath.begin(), false)) {
if (EC == errc::file_exists)
goto retry_random_path;
return EC;
}
- return error_code::success();
+ return std::error_code();
}
}
llvm_unreachable("Invalid Type");
@@ -705,29 +707,30 @@ bool is_relative(const Twine &path) {
namespace fs {
-error_code getUniqueID(const Twine Path, UniqueID &Result) {
+std::error_code getUniqueID(const Twine Path, UniqueID &Result) {
file_status Status;
- error_code EC = status(Path, Status);
+ std::error_code EC = status(Path, Status);
if (EC)
return EC;
Result = Status.getUniqueID();
- return error_code::success();
+ return std::error_code();
}
-error_code createUniqueFile(const Twine &Model, int &ResultFd,
- SmallVectorImpl<char> &ResultPath, unsigned Mode) {
+std::error_code createUniqueFile(const Twine &Model, int &ResultFd,
+ SmallVectorImpl<char> &ResultPath,
+ unsigned Mode) {
return createUniqueEntity(Model, ResultFd, ResultPath, false, Mode, FS_File);
}
-error_code createUniqueFile(const Twine &Model,
- SmallVectorImpl<char> &ResultPath) {
+std::error_code createUniqueFile(const Twine &Model,
+ SmallVectorImpl<char> &ResultPath) {
int Dummy;
return createUniqueEntity(Model, Dummy, ResultPath, false, 0, FS_Name);
}
-static error_code createTemporaryFile(const Twine &Model, int &ResultFD,
- llvm::SmallVectorImpl<char> &ResultPath,
- FSEntity Type) {
+static std::error_code
+createTemporaryFile(const Twine &Model, int &ResultFD,
+ llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) {
SmallString<128> Storage;
StringRef P = Model.toNullTerminatedStringRef(Storage);
assert(P.find_first_of(separators) == StringRef::npos &&
@@ -737,24 +740,22 @@ static error_code createTemporaryFile(const Twine &Model, int &ResultFD,
true, owner_read | owner_write, Type);
}
-static error_code
+static std::error_code
createTemporaryFile(const Twine &Prefix, StringRef Suffix, int &ResultFD,
- llvm::SmallVectorImpl<char> &ResultPath,
- FSEntity Type) {
+ llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) {
const char *Middle = Suffix.empty() ? "-%%%%%%" : "-%%%%%%.";
return createTemporaryFile(Prefix + Middle + Suffix, ResultFD, ResultPath,
Type);
}
-
-error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix,
- int &ResultFD,
- SmallVectorImpl<char> &ResultPath) {
+std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix,
+ int &ResultFD,
+ SmallVectorImpl<char> &ResultPath) {
return createTemporaryFile(Prefix, Suffix, ResultFD, ResultPath, FS_File);
}
-error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix,
- SmallVectorImpl<char> &ResultPath) {
+std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix,
+ SmallVectorImpl<char> &ResultPath) {
int Dummy;
return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name);
}
@@ -762,14 +763,14 @@ error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix,
// This is a mkdtemp with a different pattern. We use createUniqueEntity mostly
// for consistency. We should try using mkdtemp.
-error_code createUniqueDirectory(const Twine &Prefix,
- SmallVectorImpl<char> &ResultPath) {
+std::error_code createUniqueDirectory(const Twine &Prefix,
+ SmallVectorImpl<char> &ResultPath) {
int Dummy;
return createUniqueEntity(Prefix + "-%%%%%%", Dummy, ResultPath,
true, 0, FS_Dir);
}
-error_code make_absolute(SmallVectorImpl<char> &path) {
+std::error_code make_absolute(SmallVectorImpl<char> &path) {
StringRef p(path.data(), path.size());
bool rootDirectory = path::has_root_directory(p),
@@ -781,11 +782,12 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
// Already absolute.
if (rootName && rootDirectory)
- return error_code::success();
+ return std::error_code();
// All of the following conditions will need the current directory.
SmallString<128> current_dir;
- if (error_code ec = current_path(current_dir)) return ec;
+ if (std::error_code ec = current_path(current_dir))
+ return ec;
// Relative path. Prepend the current directory.
if (!rootName && !rootDirectory) {
@@ -793,7 +795,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
path::append(current_dir, p);
// Set path to the result.
path.swap(current_dir);
- return error_code::success();
+ return std::error_code();
}
if (!rootName && rootDirectory) {
@@ -802,7 +804,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
path::append(curDirRootName, p);
// Set path to the result.
path.swap(curDirRootName);
- return error_code::success();
+ return std::error_code();
}
if (rootName && !rootDirectory) {
@@ -814,19 +816,19 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
SmallString<128> res;
path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
path.swap(res);
- return error_code::success();
+ return std::error_code();
}
llvm_unreachable("All rootName and rootDirectory combinations should have "
"occurred above!");
}
-error_code create_directories(const Twine &Path, bool IgnoreExisting) {
+std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
SmallString<128> PathStorage;
StringRef P = Path.toStringRef(PathStorage);
// Be optimistic and try to create the directory
- error_code EC = create_directory(P, IgnoreExisting);
+ std::error_code EC = create_directory(P, IgnoreExisting);
// If we succeeded, or had any error other than the parent not existing, just
// return it.
if (EC != errc::no_such_file_or_directory)
@@ -844,6 +846,40 @@ error_code create_directories(const Twine &Path, bool IgnoreExisting) {
return create_directory(P, IgnoreExisting);
}
+std::error_code copy_file(const Twine &From, const Twine &To) {
+ int ReadFD, WriteFD;
+ if (std::error_code EC = openFileForRead(From, ReadFD))
+ return EC;
+ if (std::error_code EC = openFileForWrite(To, WriteFD, F_None)) {
+ close(ReadFD);
+ return EC;
+ }
+
+ const size_t BufSize = 4096;
+ char *Buf = new char[BufSize];
+ int BytesRead = 0, BytesWritten = 0;
+ for (;;) {
+ BytesRead = read(ReadFD, Buf, BufSize);
+ if (BytesRead <= 0)
+ break;
+ while (BytesRead) {
+ BytesWritten = write(WriteFD, Buf, BytesRead);
+ if (BytesWritten < 0)
+ break;
+ BytesRead -= BytesWritten;
+ }
+ if (BytesWritten < 0)
+ break;
+ }
+ close(ReadFD);
+ close(WriteFD);
+ delete[] Buf;
+
+ if (BytesRead < 0 || BytesWritten < 0)
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
+}
+
bool exists(file_status status) {
return status_known(status) && status.type() != file_type::file_not_found;
}
@@ -856,24 +892,24 @@ bool is_directory(file_status status) {
return status.type() == file_type::directory_file;
}
-error_code is_directory(const Twine &path, bool &result) {
+std::error_code is_directory(const Twine &path, bool &result) {
file_status st;
- if (error_code ec = status(path, st))
+ if (std::error_code ec = status(path, st))
return ec;
result = is_directory(st);
- return error_code::success();
+ return std::error_code();
}
bool is_regular_file(file_status status) {
return status.type() == file_type::regular_file;
}
-error_code is_regular_file(const Twine &path, bool &result) {
+std::error_code is_regular_file(const Twine &path, bool &result) {
file_status st;
- if (error_code ec = status(path, st))
+ if (std::error_code ec = status(path, st))
return ec;
result = is_regular_file(st);
- return error_code::success();
+ return std::error_code();
}
bool is_other(file_status status) {
@@ -890,26 +926,8 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) {
Status = st;
}
-error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
- SmallString<32> MagicStorage;
- StringRef Magic = magic.toStringRef(MagicStorage);
- SmallString<32> Buffer;
-
- if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
- if (ec == errc::value_too_large) {
- // Magic.size() > file_size(Path).
- result = false;
- return error_code::success();
- }
- return ec;
- }
-
- result = Magic == Buffer;
- return error_code::success();
-}
-
/// @brief Identify the magic in magic.
- file_magic identify_magic(StringRef Magic) {
+file_magic identify_magic(StringRef Magic) {
if (Magic.size() < 4)
return file_magic::unknown;
switch ((unsigned char)Magic[0]) {
@@ -1040,17 +1058,21 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
return file_magic::unknown;
}
-error_code identify_magic(const Twine &path, file_magic &result) {
- SmallString<32> Magic;
- error_code ec = get_magic(path, Magic.capacity(), Magic);
- if (ec && ec != errc::value_too_large)
- return ec;
+std::error_code identify_magic(const Twine &Path, file_magic &Result) {
+ int FD;
+ if (std::error_code EC = openFileForRead(Path, FD))
+ return EC;
+
+ char Buffer[32];
+ int Length = read(FD, Buffer, sizeof(Buffer));
+ if (close(FD) != 0 || Length < 0)
+ return std::error_code(errno, std::generic_category());
- result = identify_magic(Magic);
- return error_code::success();
+ Result = identify_magic(StringRef(Buffer, Length));
+ return std::error_code();
}
-error_code directory_entry::status(file_status &result) const {
+std::error_code directory_entry::status(file_status &result) const {
return fs::status(Path, result);
}
diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp
index 0380ed9..0d42e0e 100644
--- a/lib/Support/Process.cpp
+++ b/lib/Support/Process.cpp
@@ -7,13 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This header file implements the operating system Process concept.
+// This file implements the operating system Process concept.
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
using namespace llvm;
using namespace sys;
@@ -66,6 +69,33 @@ TimeValue self_process::get_wall_time() const {
return getElapsedWallTime();
}
+Optional<std::string> Process::FindInEnvPath(const std::string& EnvName,
+ const std::string& FileName)
+{
+ Optional<std::string> FoundPath;
+ Optional<std::string> OptPath = Process::GetEnv(EnvName);
+ if (!OptPath.hasValue())
+ return FoundPath;
+
+ const char EnvPathSeparatorStr[] = {EnvPathSeparator, '\0'};
+ SmallVector<StringRef, 8> Dirs;
+ SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr);
+
+ for (const auto &Dir : Dirs) {
+ if (Dir.empty())
+ continue;
+
+ SmallString<128> FilePath(Dir);
+ path::append(FilePath, FileName);
+ if (fs::exists(Twine(FilePath))) {
+ FoundPath = FilePath.str();
+ break;
+ }
+ }
+
+ return FoundPath;
+}
+
#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 83f2ec4..b84b82b 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -7,13 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This header file implements the operating system Program concept.
+// This file implements the operating system Program concept.
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Program.h"
#include "llvm/Config/config.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
using namespace llvm;
using namespace sys;
@@ -34,7 +34,8 @@ int sys::ExecuteAndWait(StringRef Program, const char **args, const char **envp,
if (Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) {
if (ExecutionFailed)
*ExecutionFailed = false;
- ProcessInfo Result = Wait(PI, secondsToWait, true, ErrMsg);
+ ProcessInfo Result = Wait(
+ PI, secondsToWait, /*WaitUntilTerminates=*/secondsToWait == 0, ErrMsg);
return Result.ReturnCode;
}
diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp
new file mode 100644
index 0000000..c50e7cb
--- /dev/null
+++ b/lib/Support/RandomNumberGenerator.cpp
@@ -0,0 +1,61 @@
+//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements random number generation (RNG).
+// The current implementation is NOT cryptographically secure as it uses
+// the C++11 <random> facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rng"
+#include "llvm/Support/RandomNumberGenerator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// Tracking BUG: 19665
+// http://llvm.org/bugs/show_bug.cgi?id=19665
+//
+// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing.
+static cl::opt<unsigned long long>
+Seed("rng-seed", cl::value_desc("seed"),
+ cl::desc("Seed for the random number generator"), cl::init(0));
+
+RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
+ DEBUG(
+ if (Seed == 0)
+ errs() << "Warning! Using unseeded random number generator.\n"
+ );
+
+ // Combine seed and salt using std::seed_seq.
+ // Entropy: Seed-low, Seed-high, Salt...
+ std::vector<uint32_t> Data;
+ Data.reserve(2 + Salt.size()/4 + 1);
+ Data.push_back(Seed);
+ Data.push_back(Seed >> 32);
+
+ uint32_t Pack = 0;
+ for (size_t I = 0; I < Salt.size(); ++I) {
+ Pack <<= 8;
+ Pack += Salt[I];
+
+ if (I%4 == 3)
+ Data.push_back(Pack);
+ }
+ Data.push_back(Pack);
+
+ std::seed_seq SeedSeq(Data.begin(), Data.end());
+ Generator.seed(SeedSeq);
+}
+
+uint64_t RandomNumberGenerator::next(uint64_t Max) {
+ std::uniform_int_distribution<uint64_t> distribution(0, Max - 1);
+ return distribution(Generator);
+}
diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp
new file mode 100644
index 0000000..3fe027b
--- /dev/null
+++ b/lib/Support/ScaledNumber.cpp
@@ -0,0 +1,319 @@
+//==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of some scaled number algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ScaledNumber.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+using namespace llvm::ScaledNumbers;
+
+std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS,
+ uint64_t RHS) {
+ // Separate into two 32-bit digits (U.L).
+ auto getU = [](uint64_t N) { return N >> 32; };
+ auto getL = [](uint64_t N) { return N & UINT32_MAX; };
+ uint64_t UL = getU(LHS), LL = getL(LHS), UR = getU(RHS), LR = getL(RHS);
+
+ // Compute cross products.
+ uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
+
+ // Sum into two 64-bit digits.
+ uint64_t Upper = P1, Lower = P4;
+ auto addWithCarry = [&](uint64_t N) {
+ uint64_t NewLower = Lower + (getL(N) << 32);
+ Upper += getU(N) + (NewLower < Lower);
+ Lower = NewLower;
+ };
+ addWithCarry(P2);
+ addWithCarry(P3);
+
+ // Check whether the upper digit is empty.
+ if (!Upper)
+ return std::make_pair(Lower, 0);
+
+ // Shift as little as possible to maximize precision.
+ unsigned LeadingZeros = countLeadingZeros(Upper);
+ int Shift = 64 - LeadingZeros;
+ if (LeadingZeros)
+ Upper = Upper << LeadingZeros | Lower >> Shift;
+ return getRounded(Upper, Shift,
+ Shift && (Lower & UINT64_C(1) << (Shift - 1)));
+}
+
+static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); }
+
+std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend,
+ uint32_t Divisor) {
+ assert(Dividend && "expected non-zero dividend");
+ assert(Divisor && "expected non-zero divisor");
+
+ // Use 64-bit math and canonicalize the dividend to gain precision.
+ uint64_t Dividend64 = Dividend;
+ int Shift = 0;
+ if (int Zeros = countLeadingZeros(Dividend64)) {
+ Shift -= Zeros;
+ Dividend64 <<= Zeros;
+ }
+ uint64_t Quotient = Dividend64 / Divisor;
+ uint64_t Remainder = Dividend64 % Divisor;
+
+ // If Quotient needs to be shifted, leave the rounding to getAdjusted().
+ if (Quotient > UINT32_MAX)
+ return getAdjusted<uint32_t>(Quotient, Shift);
+
+ // Round based on the value of the next bit.
+ return getRounded<uint32_t>(Quotient, Shift, Remainder >= getHalf(Divisor));
+}
+
+std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend,
+ uint64_t Divisor) {
+ assert(Dividend && "expected non-zero dividend");
+ assert(Divisor && "expected non-zero divisor");
+
+ // Minimize size of divisor.
+ int Shift = 0;
+ if (int Zeros = countTrailingZeros(Divisor)) {
+ Shift -= Zeros;
+ Divisor >>= Zeros;
+ }
+
+ // Check for powers of two.
+ if (Divisor == 1)
+ return std::make_pair(Dividend, Shift);
+
+ // Maximize size of dividend.
+ if (int Zeros = countLeadingZeros(Dividend)) {
+ Shift -= Zeros;
+ Dividend <<= Zeros;
+ }
+
+ // Start with the result of a divide.
+ uint64_t Quotient = Dividend / Divisor;
+ Dividend %= Divisor;
+
+ // Continue building the quotient with long division.
+ while (!(Quotient >> 63) && Dividend) {
+ // Shift Dividend and check for overflow.
+ bool IsOverflow = Dividend >> 63;
+ Dividend <<= 1;
+ --Shift;
+
+ // Get the next bit of Quotient.
+ Quotient <<= 1;
+ if (IsOverflow || Divisor <= Dividend) {
+ Quotient |= 1;
+ Dividend -= Divisor;
+ }
+ }
+
+ return getRounded(Quotient, Shift, Dividend >= getHalf(Divisor));
+}
+
+int ScaledNumbers::compareImpl(uint64_t L, uint64_t R, int ScaleDiff) {
+ assert(ScaleDiff >= 0 && "wrong argument order");
+ assert(ScaleDiff < 64 && "numbers too far apart");
+
+ uint64_t L_adjusted = L >> ScaleDiff;
+ if (L_adjusted < R)
+ return -1;
+ if (L_adjusted > R)
+ return 1;
+
+ return L > L_adjusted << ScaleDiff ? 1 : 0;
+}
+
+static void appendDigit(std::string &Str, unsigned D) {
+ assert(D < 10);
+ Str += '0' + D % 10;
+}
+
+static void appendNumber(std::string &Str, uint64_t N) {
+ while (N) {
+ appendDigit(Str, N % 10);
+ N /= 10;
+ }
+}
+
+static bool doesRoundUp(char Digit) {
+ switch (Digit) {
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
+ assert(E >= ScaledNumbers::MinScale);
+ assert(E <= ScaledNumbers::MaxScale);
+
+ // Find a new E, but don't let it increase past MaxScale.
+ int LeadingZeros = ScaledNumberBase::countLeadingZeros64(D);
+ int NewE = std::min(ScaledNumbers::MaxScale, E + 63 - LeadingZeros);
+ int Shift = 63 - (NewE - E);
+ assert(Shift <= LeadingZeros);
+ assert(Shift == LeadingZeros || NewE == ScaledNumbers::MaxScale);
+ D <<= Shift;
+ E = NewE;
+
+ // Check for a denormal.
+ unsigned AdjustedE = E + 16383;
+ if (!(D >> 63)) {
+ assert(E == ScaledNumbers::MaxScale);
+ AdjustedE = 0;
+ }
+
+ // Build the float and print it.
+ uint64_t RawBits[2] = {D, AdjustedE};
+ APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
+ SmallVector<char, 24> Chars;
+ Float.toString(Chars, Precision, 0);
+ return std::string(Chars.begin(), Chars.end());
+}
+
+static std::string stripTrailingZeros(const std::string &Float) {
+ size_t NonZero = Float.find_last_not_of('0');
+ assert(NonZero != std::string::npos && "no . in floating point string");
+
+ if (Float[NonZero] == '.')
+ ++NonZero;
+
+ return Float.substr(0, NonZero + 1);
+}
+
+std::string ScaledNumberBase::toString(uint64_t D, int16_t E, int Width,
+ unsigned Precision) {
+ if (!D)
+ return "0.0";
+
+ // Canonicalize exponent and digits.
+ uint64_t Above0 = 0;
+ uint64_t Below0 = 0;
+ uint64_t Extra = 0;
+ int ExtraShift = 0;
+ if (E == 0) {
+ Above0 = D;
+ } else if (E > 0) {
+ if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
+ D <<= Shift;
+ E -= Shift;
+
+ if (!E)
+ Above0 = D;
+ }
+ } else if (E > -64) {
+ Above0 = D >> -E;
+ Below0 = D << (64 + E);
+ } else if (E > -120) {
+ Below0 = D >> (-E - 64);
+ Extra = D << (128 + E);
+ ExtraShift = -64 - E;
+ }
+
+ // Fall back on APFloat for very small and very large numbers.
+ if (!Above0 && !Below0)
+ return toStringAPFloat(D, E, Precision);
+
+ // Append the digits before the decimal.
+ std::string Str;
+ size_t DigitsOut = 0;
+ if (Above0) {
+ appendNumber(Str, Above0);
+ DigitsOut = Str.size();
+ } else
+ appendDigit(Str, 0);
+ std::reverse(Str.begin(), Str.end());
+
+ // Return early if there's nothing after the decimal.
+ if (!Below0)
+ return Str + ".0";
+
+ // Append the decimal and beyond.
+ Str += '.';
+ uint64_t Error = UINT64_C(1) << (64 - Width);
+
+ // We need to shift Below0 to the right to make space for calculating
+ // digits. Save the precision we're losing in Extra.
+ Extra = (Below0 & 0xf) << 56 | (Extra >> 8);
+ Below0 >>= 4;
+ size_t SinceDot = 0;
+ size_t AfterDot = Str.size();
+ do {
+ if (ExtraShift) {
+ --ExtraShift;
+ Error *= 5;
+ } else
+ Error *= 10;
+
+ Below0 *= 10;
+ Extra *= 10;
+ Below0 += (Extra >> 60);
+ Extra = Extra & (UINT64_MAX >> 4);
+ appendDigit(Str, Below0 >> 60);
+ Below0 = Below0 & (UINT64_MAX >> 4);
+ if (DigitsOut || Str.back() != '0')
+ ++DigitsOut;
+ ++SinceDot;
+ } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 &&
+ (!Precision || DigitsOut <= Precision || SinceDot < 2));
+
+ // Return early for maximum precision.
+ if (!Precision || DigitsOut <= Precision)
+ return stripTrailingZeros(Str);
+
+ // Find where to truncate.
+ size_t Truncate =
+ std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
+
+ // Check if there's anything to truncate.
+ if (Truncate >= Str.size())
+ return stripTrailingZeros(Str);
+
+ bool Carry = doesRoundUp(Str[Truncate]);
+ if (!Carry)
+ return stripTrailingZeros(Str.substr(0, Truncate));
+
+ // Round with the first truncated digit.
+ for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
+ I != E; ++I) {
+ if (*I == '.')
+ continue;
+ if (*I == '9') {
+ *I = '0';
+ continue;
+ }
+
+ ++*I;
+ Carry = false;
+ break;
+ }
+
+ // Add "1" in front if we still need to carry.
+ return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
+}
+
+raw_ostream &ScaledNumberBase::print(raw_ostream &OS, uint64_t D, int16_t E,
+ int Width, unsigned Precision) {
+ return OS << toString(D, E, Width, Precision);
+}
+
+void ScaledNumberBase::dump(uint64_t D, int16_t E, int Width) {
+ print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
+ << "]";
+}
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index acd75fb..003cb56 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -20,14 +20,14 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
using namespace llvm;
static const size_t TabStop = 8;
namespace {
struct LineNoCacheTy {
- int LastQueryBufferID;
+ unsigned LastQueryBufferID;
const char *LastQuery;
unsigned LineNoOfQuery;
};
@@ -49,48 +49,44 @@ SourceMgr::~SourceMgr() {
}
}
-/// AddIncludeFile - Search for a file with the specified name in the current
-/// directory or in one of the IncludeDirs. If no file is found, this returns
-/// ~0, otherwise it returns the buffer ID of the stacked file.
-size_t SourceMgr::AddIncludeFile(const std::string &Filename,
- SMLoc IncludeLoc,
- std::string &IncludedFile) {
- std::unique_ptr<MemoryBuffer> NewBuf;
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+ SMLoc IncludeLoc,
+ std::string &IncludedFile) {
IncludedFile = Filename;
- MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+ ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
+ MemoryBuffer::getFile(IncludedFile.c_str());
// If the file didn't exist directly, see if it's in an include path.
- for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
- IncludedFile = IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
- MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
+ for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
+ ++i) {
+ IncludedFile =
+ IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
+ NewBufOrErr = MemoryBuffer::getFile(IncludedFile.c_str());
}
- if (!NewBuf) return ~0U;
+ if (!NewBufOrErr)
+ return 0;
- return AddNewSourceBuffer(NewBuf.release(), IncludeLoc);
+ return AddNewSourceBuffer(NewBufOrErr.get().release(), IncludeLoc);
}
-
-/// FindBufferContainingLoc - Return the ID of the buffer containing the
-/// specified location, returning -1 if not found.
-int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
// Use <= here so that a pointer to the null at the end of the buffer
// is included as part of the buffer.
Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
- return i;
- return -1;
+ return i + 1;
+ return 0;
}
-/// getLineAndColumn - Find the line and column number for the specified
-/// location in the specified file. This is not a fast method.
std::pair<unsigned, unsigned>
-SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const {
- if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
- assert(BufferID != -1 && "Invalid Location!");
+SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
+ if (!BufferID)
+ BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID && "Invalid Location!");
- MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+ const MemoryBuffer *Buff = getMemoryBuffer(BufferID);
// Count the number of \n's between the start of the file and the specified
// location.
@@ -132,8 +128,8 @@ SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const {
void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
if (IncludeLoc == SMLoc()) return; // Top of stack.
- int CurBuf = FindBufferContainingLoc(IncludeLoc);
- assert(CurBuf != -1 && "Invalid or unspecified location!");
+ unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
+ assert(CurBuf && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
@@ -143,11 +139,6 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
}
-/// GetMessage - Return an SMDiagnostic at the specified location with the
-/// specified string.
-///
-/// @param Type - If non-null, the kind of message (e.g., "error") which is
-/// prefixed to the message.
SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
const Twine &Msg,
ArrayRef<SMRange> Ranges,
@@ -161,10 +152,10 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
std::string LineStr;
if (Loc.isValid()) {
- int CurBuf = FindBufferContainingLoc(Loc);
- assert(CurBuf != -1 && "Invalid or unspecified location!");
+ unsigned CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf && "Invalid or unspecified location!");
- MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+ const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
BufferID = CurMB->getBufferIdentifier();
// Scan backward to find the start of the line.
@@ -211,27 +202,30 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
LineStr, ColRanges, FixIts);
}
-void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
- SourceMgr::DiagKind Kind,
- const Twine &Msg, ArrayRef<SMRange> Ranges,
- ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
- SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts);
-
+void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
+ bool ShowColors) const {
// Report the message with the diagnostic handler if present.
if (DiagHandler) {
DiagHandler(Diagnostic, DiagContext);
return;
}
- if (Loc != SMLoc()) {
- int CurBuf = FindBufferContainingLoc(Loc);
- assert(CurBuf != -1 && "Invalid or unspecified location!");
+ if (Diagnostic.getLoc().isValid()) {
+ unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
+ assert(CurBuf && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
}
Diagnostic.print(nullptr, OS, ShowColors);
}
+void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
+ SourceMgr::DiagKind Kind,
+ const Twine &Msg, ArrayRef<SMRange> Ranges,
+ ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
+ PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
+}
+
void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
const Twine &Msg, ArrayRef<SMRange> Ranges,
ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp
index 2c6fcd1..21e43c5 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Support/SpecialCaseList.cpp
@@ -14,20 +14,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
+#include "llvm/Support/SpecialCaseList.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Module.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
#include <string>
+#include <system_error>
#include <utility>
namespace llvm {
@@ -38,10 +34,12 @@ namespace llvm {
/// reason for doing so is efficiency; StringSet is much faster at matching
/// literal strings than Regex.
struct SpecialCaseList::Entry {
- StringSet<> Strings;
- Regex *RegEx;
+ Entry() {}
+ Entry(Entry &&Other)
+ : Strings(std::move(Other.Strings)), RegEx(std::move(Other.RegEx)) {}
- Entry() : RegEx(nullptr) {}
+ StringSet<> Strings;
+ std::unique_ptr<Regex> RegEx;
bool match(StringRef Query) const {
return Strings.count(Query) || (RegEx && RegEx->match(Query));
@@ -54,12 +52,13 @@ SpecialCaseList *SpecialCaseList::create(
const StringRef Path, std::string &Error) {
if (Path.empty())
return new SpecialCaseList();
- std::unique_ptr<MemoryBuffer> File;
- if (error_code EC = MemoryBuffer::getFile(Path, File)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFile(Path);
+ if (std::error_code EC = FileOrErr.getError()) {
Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
return nullptr;
}
- return create(File.get(), Error);
+ return create(FileOrErr.get().get(), Error);
}
SpecialCaseList *SpecialCaseList::create(
@@ -150,66 +149,16 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
for (StringMap<std::string>::const_iterator II = I->second.begin(),
IE = I->second.end();
II != IE; ++II) {
- Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
+ Entries[I->getKey()][II->getKey()].RegEx.reset(new Regex(II->getValue()));
}
}
return true;
}
-SpecialCaseList::~SpecialCaseList() {
- for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
- E = Entries.end();
- I != E; ++I) {
- for (StringMap<Entry>::const_iterator II = I->second.begin(),
- IE = I->second.end();
- II != IE; ++II) {
- delete II->second.RegEx;
- }
- }
-}
-
-bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const {
- return isIn(*F.getParent(), Category) ||
- inSectionCategory("fun", F.getName(), Category);
-}
-
-static StringRef GetGlobalTypeString(const GlobalValue &G) {
- // Types of GlobalVariables are always pointer types.
- Type *GType = G.getType()->getElementType();
- // For now we support blacklisting struct types only.
- if (StructType *SGType = dyn_cast<StructType>(GType)) {
- if (!SGType->isLiteral())
- return SGType->getName();
- }
- return "<unknown type>";
-}
-
-bool SpecialCaseList::isIn(const GlobalVariable &G,
- const StringRef Category) const {
- return isIn(*G.getParent(), Category) ||
- inSectionCategory("global", G.getName(), Category) ||
- inSectionCategory("type", GetGlobalTypeString(G), Category);
-}
-
-bool SpecialCaseList::isIn(const GlobalAlias &GA,
- const StringRef Category) const {
- if (isIn(*GA.getParent(), Category))
- return true;
-
- if (isa<FunctionType>(GA.getType()->getElementType()))
- return inSectionCategory("fun", GA.getName(), Category);
-
- return inSectionCategory("global", GA.getName(), Category) ||
- inSectionCategory("type", GetGlobalTypeString(GA), Category);
-}
-
-bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
- return inSectionCategory("src", M.getModuleIdentifier(), Category);
-}
+SpecialCaseList::~SpecialCaseList() {}
-bool SpecialCaseList::inSectionCategory(const StringRef Section,
- const StringRef Query,
- const StringRef Category) const {
+bool SpecialCaseList::inSection(const StringRef Section, const StringRef Query,
+ const StringRef Category) const {
StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
if (I == Entries.end()) return false;
StringMap<Entry>::const_iterator II = I->second.find(Category);
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 72a6d82..ddb7349 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
/// RehashTable - Grow the table, redistributing values into the buckets with
/// the appropriate mod-of-hashtable-size.
-void StringMapImpl::RehashTable() {
+unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
unsigned NewSize;
unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
@@ -193,9 +193,10 @@ void StringMapImpl::RehashTable() {
} else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) {
NewSize = NumBuckets;
} else {
- return;
+ return BucketNo;
}
+ unsigned NewBucketNo = BucketNo;
// Allocate one extra bucket which will always be non-empty. This allows the
// iterators to stop at end.
StringMapEntryBase **NewTableArray =
@@ -215,6 +216,8 @@ void StringMapImpl::RehashTable() {
if (!NewTableArray[NewBucket]) {
NewTableArray[FullHash & (NewSize-1)] = Bucket;
NewHashArray[FullHash & (NewSize-1)] = FullHash;
+ if (I == BucketNo)
+ NewBucketNo = NewBucket;
continue;
}
@@ -227,6 +230,8 @@ void StringMapImpl::RehashTable() {
// Finally found a slot. Fill it in.
NewTableArray[NewBucket] = Bucket;
NewHashArray[NewBucket] = FullHash;
+ if (I == BucketNo)
+ NewBucketNo = NewBucket;
}
}
@@ -235,4 +240,5 @@ void StringMapImpl::RehashTable() {
TheTable = NewTableArray;
NumBuckets = NewSize;
NumTombstones = 0;
+ return NewBucketNo;
}
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index ff607cf..76faabc 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -27,7 +27,7 @@ PooledStringPtr StringPool::intern(StringRef Key) {
if (I != InternTable.end())
return PooledStringPtr(&*I);
- entry_t *S = entry_t::Create(Key.begin(), Key.end());
+ entry_t *S = entry_t::Create(Key);
S->getValue().Pool = this;
InternTable.insert(S);
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index a008831..f691883 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -116,17 +116,6 @@ void TargetRegistry::RegisterTarget(Target &T,
T.HasJIT = HasJIT;
}
-const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
- const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error);
-
- if (TheTarget && !TheTarget->hasJIT()) {
- Error = "No JIT compatible target available for this host";
- return nullptr;
- }
-
- return TheTarget;
-}
-
static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS,
const std::pair<StringRef, const Target *> *RHS) {
return LHS->first.compare(RHS->first);
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 1acfa79..ca7f3f6 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements llvm_start_multithreaded() and friends.
+// This file defines helper functions for running LLVM in a multi-threaded
+// environment.
//
//===----------------------------------------------------------------------===//
@@ -19,50 +20,14 @@
using namespace llvm;
-static bool multithreaded_mode = false;
-
-static sys::Mutex* global_lock = nullptr;
-
-bool llvm::llvm_start_multithreaded() {
+bool llvm::llvm_is_multithreaded() {
#if LLVM_ENABLE_THREADS != 0
- assert(!multithreaded_mode && "Already multithreaded!");
- multithreaded_mode = true;
- global_lock = new sys::Mutex(true);
-
- // We fence here to ensure that all initialization is complete BEFORE we
- // return from llvm_start_multithreaded().
- sys::MemoryFence();
return true;
#else
return false;
#endif
}
-void llvm::llvm_stop_multithreaded() {
-#if LLVM_ENABLE_THREADS != 0
- assert(multithreaded_mode && "Not currently multithreaded!");
-
- // We fence here to insure that all threaded operations are complete BEFORE we
- // return from llvm_stop_multithreaded().
- sys::MemoryFence();
-
- multithreaded_mode = false;
- delete global_lock;
-#endif
-}
-
-bool llvm::llvm_is_multithreaded() {
- return multithreaded_mode;
-}
-
-void llvm::llvm_acquire_global_lock() {
- if (multithreaded_mode) global_lock->acquire();
-}
-
-void llvm::llvm_release_global_lock() {
- if (multithreaded_mode) global_lock->release();
-}
-
#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
#include <pthread.h>
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
index bd8af17..4a70797 100644
--- a/lib/Support/TimeValue.cpp
+++ b/lib/Support/TimeValue.cpp
@@ -53,7 +53,7 @@ TimeValue::normalize( void ) {
}
-/// Include the platform specific portion of TimeValue class
+/// Include the platform-specific portion of TimeValue class
#ifdef LLVM_ON_UNIX
#include "Unix/TimeValue.inc"
#endif
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 61465ae..210bda7 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -84,14 +85,13 @@ static TimerGroup *getDefaultTimerGroup() {
sys::MemoryFence();
if (tmp) return tmp;
- llvm_acquire_global_lock();
+ sys::SmartScopedLock<true> Lock(*TimerLock);
tmp = DefaultTimerGroup;
if (!tmp) {
tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
sys::MemoryFence();
DefaultTimerGroup = tmp;
}
- llvm_release_global_lock();
return tmp;
}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index b3d48fb..b74ee13 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -50,6 +50,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case amdil: return "amdil";
case spir: return "spir";
case spir64: return "spir64";
+ case kalimba: return "kalimba";
}
llvm_unreachable("Invalid ArchType!");
@@ -101,6 +102,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case amdil: return "amdil";
case spir: return "spir";
case spir64: return "spir";
+ case kalimba: return "kalimba";
}
}
@@ -115,7 +117,9 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
case BGQ: return "bgq";
case Freescale: return "fsl";
case IBM: return "ibm";
+ case ImaginationTechnologies: return "img";
case NVIDIA: return "nvidia";
+ case CSR: return "csr";
}
llvm_unreachable("Invalid VendorType!");
@@ -207,6 +211,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("amdil", amdil)
.Case("spir", spir)
.Case("spir64", spir64)
+ .Case("kalimba", kalimba)
.Default(UnknownArch);
}
@@ -280,6 +285,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("amdil", Triple::amdil)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
+ .Case("kalimba", Triple::kalimba)
.Default(Triple::UnknownArch);
}
@@ -292,7 +298,9 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
.Case("bgq", Triple::BGQ)
.Case("fsl", Triple::Freescale)
.Case("ibm", Triple::IBM)
+ .Case("img", Triple::ImaginationTechnologies)
.Case("nvidia", Triple::NVIDIA)
+ .Case("csr", Triple::CSR)
.Default(Triple::UnknownVendor);
}
@@ -737,9 +745,8 @@ void Triple::setObjectFormat(ObjectFormatType Kind) {
if (Environment == UnknownEnvironment)
return setEnvironmentName(getObjectFormatTypeName(Kind));
- Twine Env = getEnvironmentTypeName(Environment) + Twine("-") +
- getObjectFormatTypeName(Kind);
- setEnvironmentName(Env.str());
+ setEnvironmentName((getEnvironmentTypeName(Environment) + Twine("-") +
+ getObjectFormatTypeName(Kind)).str());
}
void Triple::setArchName(StringRef Str) {
@@ -799,6 +806,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::x86:
case llvm::Triple::xcore:
case llvm::Triple::spir:
+ case llvm::Triple::kalimba:
return 32;
case llvm::Triple::arm64:
@@ -850,6 +858,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::arm:
case Triple::armeb:
case Triple::hexagon:
+ case Triple::kalimba:
case Triple::le32:
case Triple::mips:
case Triple::mipsel:
@@ -884,6 +893,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::arm:
case Triple::armeb:
case Triple::hexagon:
+ case Triple::kalimba:
case Triple::le32:
case Triple::msp430:
case Triple::r600:
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 23b49b7..c9d89a8 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -83,8 +83,8 @@ MemoryBlock
Memory::allocateMappedMemory(size_t NumBytes,
const MemoryBlock *const NearBlock,
unsigned PFlags,
- error_code &EC) {
- EC = error_code::success();
+ std::error_code &EC) {
+ EC = std::error_code();
if (NumBytes == 0)
return MemoryBlock();
@@ -95,7 +95,7 @@ Memory::allocateMappedMemory(size_t NumBytes,
#ifdef NEED_DEV_ZERO_FOR_MMAP
static int zero_fd = open("/dev/zero", O_RDWR);
if (zero_fd == -1) {
- EC = error_code(errno, system_category());
+ EC = std::error_code(errno, std::generic_category());
return MemoryBlock();
}
fd = zero_fd;
@@ -123,7 +123,7 @@ Memory::allocateMappedMemory(size_t NumBytes,
if (NearBlock) //Try again without a near hint
return allocateMappedMemory(NumBytes, nullptr, PFlags, EC);
- EC = error_code(errno, system_category());
+ EC = std::error_code(errno, std::generic_category());
return MemoryBlock();
}
@@ -137,38 +137,38 @@ Memory::allocateMappedMemory(size_t NumBytes,
return Result;
}
-error_code
+std::error_code
Memory::releaseMappedMemory(MemoryBlock &M) {
if (M.Address == nullptr || M.Size == 0)
- return error_code::success();
+ return std::error_code();
if (0 != ::munmap(M.Address, M.Size))
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
M.Address = nullptr;
M.Size = 0;
- return error_code::success();
+ return std::error_code();
}
-error_code
+std::error_code
Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
if (M.Address == nullptr || M.Size == 0)
- return error_code::success();
+ return std::error_code();
if (!Flags)
- return error_code(EINVAL, generic_category());
+ return std::error_code(EINVAL, std::generic_category());
int Protect = getPosixProtectionFlags(Flags);
int Result = ::mprotect(M.Address, M.Size, Protect);
if (Result != 0)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
if (Flags & MF_EXEC)
Memory::InvalidateInstructionCache(M.Address, M.Size);
- return error_code::success();
+ return std::error_code();
}
/// AllocateRWX - Allocate a slab of memory with read/write/execute
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 519a016..623547a 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -87,7 +87,7 @@ namespace {
};
}
-static error_code TempDir(SmallVectorImpl<char> &result) {
+static std::error_code TempDir(SmallVectorImpl<char> &result) {
// FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
const char *dir = nullptr;
(dir = std::getenv("TMPDIR")) || (dir = std::getenv("TMP")) ||
@@ -100,7 +100,7 @@ static error_code TempDir(SmallVectorImpl<char> &result) {
result.clear();
StringRef d(dir);
result.append(d.begin(), d.end());
- return error_code::success();
+ return std::error_code();
}
namespace llvm {
@@ -225,7 +225,7 @@ UniqueID file_status::getUniqueID() const {
return UniqueID(fs_st_dev, fs_st_ino);
}
-error_code current_path(SmallVectorImpl<char> &result) {
+std::error_code current_path(SmallVectorImpl<char> &result) {
result.clear();
const char *pwd = ::getenv("PWD");
@@ -235,7 +235,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
!llvm::sys::fs::status(".", DotStatus) &&
PWDStatus.getUniqueID() == DotStatus.getUniqueID()) {
result.append(pwd, pwd + strlen(pwd));
- return error_code::success();
+ return std::error_code();
}
#ifdef MAXPATHLEN
@@ -248,8 +248,8 @@ error_code current_path(SmallVectorImpl<char> &result) {
while (true) {
if (::getcwd(result.data(), result.capacity()) == nullptr) {
// See if there was a real error.
- if (errno != errc::not_enough_memory)
- return error_code(errno, system_category());
+ if (errno != ENOMEM)
+ return std::error_code(errno, std::generic_category());
// Otherwise there just wasn't enough space.
result.reserve(result.capacity() * 2);
} else
@@ -257,22 +257,22 @@ error_code current_path(SmallVectorImpl<char> &result) {
}
result.set_size(strlen(result.data()));
- return error_code::success();
+ return std::error_code();
}
-error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);
if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
- if (errno != errc::file_exists || !IgnoreExisting)
- return error_code(errno, system_category());
+ if (errno != EEXIST || !IgnoreExisting)
+ return std::error_code(errno, std::generic_category());
}
- return error_code::success();
+ return std::error_code();
}
-error_code normalize_separators(SmallVectorImpl<char> &Path) {
+std::error_code normalize_separators(SmallVectorImpl<char> &Path) {
for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) {
if (*PI == '\\') {
auto PN = PI + 1;
@@ -282,12 +282,12 @@ error_code normalize_separators(SmallVectorImpl<char> &Path) {
*PI = '/';
}
}
- return error_code::success();
+ return std::error_code();
}
// Note that we are using symbolic link because hard links are not supported by
// all filesystems (SMB doesn't).
-error_code create_link(const Twine &to, const Twine &from) {
+std::error_code create_link(const Twine &to, const Twine &from) {
// Get arguments.
SmallString<128> from_storage;
SmallString<128> to_storage;
@@ -295,20 +295,20 @@ error_code create_link(const Twine &to, const Twine &from) {
StringRef t = to.toNullTerminatedStringRef(to_storage);
if (::symlink(t.begin(), f.begin()) == -1)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
- return error_code::success();
+ return std::error_code();
}
-error_code remove(const Twine &path, bool IgnoreNonExisting) {
+std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);
struct stat buf;
if (lstat(p.begin(), &buf) != 0) {
- if (errno != errc::no_such_file_or_directory || !IgnoreNonExisting)
- return error_code(errno, system_category());
- return error_code::success();
+ if (errno != ENOENT || !IgnoreNonExisting)
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
}
// Note: this check catches strange situations. In all cases, LLVM should
@@ -320,14 +320,14 @@ error_code remove(const Twine &path, bool IgnoreNonExisting) {
return make_error_code(errc::operation_not_permitted);
if (::remove(p.begin()) == -1) {
- if (errno != errc::no_such_file_or_directory || !IgnoreNonExisting)
- return error_code(errno, system_category());
+ if (errno != ENOENT || !IgnoreNonExisting)
+ return std::error_code(errno, std::generic_category());
}
- return error_code::success();
+ return std::error_code();
}
-error_code rename(const Twine &from, const Twine &to) {
+std::error_code rename(const Twine &from, const Twine &to) {
// Get arguments.
SmallString<128> from_storage;
SmallString<128> to_storage;
@@ -335,33 +335,33 @@ error_code rename(const Twine &from, const Twine &to) {
StringRef t = to.toNullTerminatedStringRef(to_storage);
if (::rename(f.begin(), t.begin()) == -1)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
- return error_code::success();
+ return std::error_code();
}
-error_code resize_file(const Twine &path, uint64_t size) {
+std::error_code resize_file(const Twine &path, uint64_t size) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);
if (::truncate(p.begin(), size) == -1)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
- return error_code::success();
+ return std::error_code();
}
-error_code exists(const Twine &path, bool &result) {
+std::error_code exists(const Twine &path, bool &result) {
SmallString<128> path_storage;
StringRef p = path.toNullTerminatedStringRef(path_storage);
if (::access(p.begin(), F_OK) == -1) {
- if (errno != errc::no_such_file_or_directory)
- return error_code(errno, system_category());
+ if (errno != ENOENT)
+ return std::error_code(errno, std::generic_category());
result = false;
} else
result = true;
- return error_code::success();
+ return std::error_code();
}
bool can_write(const Twine &Path) {
@@ -390,18 +390,20 @@ bool equivalent(file_status A, file_status B) {
A.fs_st_ino == B.fs_st_ino;
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
file_status fsA, fsB;
- if (error_code ec = status(A, fsA)) return ec;
- if (error_code ec = status(B, fsB)) return ec;
+ if (std::error_code ec = status(A, fsA))
+ return ec;
+ if (std::error_code ec = status(B, fsB))
+ return ec;
result = equivalent(fsA, fsB);
- return error_code::success();
+ return std::error_code();
}
-static error_code fillStatus(int StatRet, const struct stat &Status,
+static std::error_code fillStatus(int StatRet, const struct stat &Status,
file_status &Result) {
if (StatRet != 0) {
- error_code ec(errno, system_category());
+ std::error_code ec(errno, std::generic_category());
if (ec == errc::no_such_file_or_directory)
Result = file_status(file_type::file_not_found);
else
@@ -429,10 +431,10 @@ static error_code fillStatus(int StatRet, const struct stat &Status,
file_status(Type, Perms, Status.st_dev, Status.st_ino, Status.st_mtime,
Status.st_uid, Status.st_gid, Status.st_size);
- return error_code::success();
+ return std::error_code();
}
-error_code status(const Twine &Path, file_status &Result) {
+std::error_code status(const Twine &Path, file_status &Result) {
SmallString<128> PathStorage;
StringRef P = Path.toNullTerminatedStringRef(PathStorage);
@@ -441,36 +443,36 @@ error_code status(const Twine &Path, file_status &Result) {
return fillStatus(StatRet, Status, Result);
}
-error_code status(int FD, file_status &Result) {
+std::error_code status(int FD, file_status &Result) {
struct stat Status;
int StatRet = ::fstat(FD, &Status);
return fillStatus(StatRet, Status, Result);
}
-error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
+std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
#if defined(HAVE_FUTIMENS)
timespec Times[2];
Times[0].tv_sec = Time.toEpochTime();
Times[0].tv_nsec = 0;
Times[1] = Times[0];
if (::futimens(FD, Times))
- return error_code(errno, system_category());
- return error_code::success();
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
#elif defined(HAVE_FUTIMES)
timeval Times[2];
Times[0].tv_sec = Time.toEpochTime();
Times[0].tv_usec = 0;
Times[1] = Times[0];
if (::futimes(FD, Times))
- return error_code(errno, system_category());
- return error_code::success();
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
#else
#warning Missing futimes() and futimens()
- return make_error_code(errc::not_supported);
+ return make_error_code(errc::function_not_supported);
#endif
}
-error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+std::error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
AutoFD ScopedFD(FD);
if (!CloseFD)
ScopedFD.take();
@@ -478,7 +480,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
// Figure out how large the file is.
struct stat FileInfo;
if (fstat(FD, &FileInfo) == -1)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
uint64_t FileSize = FileInfo.st_size;
if (Size == 0)
@@ -486,7 +488,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
else if (FileSize < Size) {
// We need to grow the file.
if (ftruncate(FD, Size) == -1)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
}
int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE;
@@ -496,15 +498,15 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
#endif
Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
- return error_code(errno, system_category());
- return error_code::success();
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
}
mapped_file_region::mapped_file_region(const Twine &path,
mapmode mode,
uint64_t length,
uint64_t offset,
- error_code &ec)
+ std::error_code &ec)
: Mode(mode)
, Size(length)
, Mapping() {
@@ -519,7 +521,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
int oflags = (mode == readonly) ? O_RDONLY : O_RDWR;
int ofd = ::open(name.begin(), oflags);
if (ofd == -1) {
- ec = error_code(errno, system_category());
+ ec = std::error_code(errno, std::generic_category());
return;
}
@@ -533,7 +535,7 @@ mapped_file_region::mapped_file_region(int fd,
mapmode mode,
uint64_t length,
uint64_t offset,
- error_code &ec)
+ std::error_code &ec)
: Mode(mode)
, Size(length)
, Mapping() {
@@ -583,12 +585,12 @@ int mapped_file_region::alignment() {
return process::get_self()->page_size();
}
-error_code detail::directory_iterator_construct(detail::DirIterState &it,
+std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
if (!directory)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
it.IterationHandle = reinterpret_cast<intptr_t>(directory);
// Add something for replace_filename to replace.
@@ -597,19 +599,19 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it,
return directory_iterator_increment(it);
}
-error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle)
::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
- return error_code::success();
+ return std::error_code();
}
-error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
errno = 0;
dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
if (cur_dir == nullptr && errno != 0) {
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
} else if (cur_dir != nullptr) {
StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
if ((name.size() == 1 && name[0] == '.') ||
@@ -619,80 +621,20 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) {
} else
return directory_iterator_destruct(it);
- return error_code::success();
-}
-
-error_code get_magic(const Twine &path, uint32_t len,
- SmallVectorImpl<char> &result) {
- SmallString<128> PathStorage;
- StringRef Path = path.toNullTerminatedStringRef(PathStorage);
- result.set_size(0);
-
- // Open path.
- std::FILE *file = std::fopen(Path.data(), "rb");
- if (!file)
- return error_code(errno, system_category());
-
- // Reserve storage.
- result.reserve(len);
-
- // Read magic!
- size_t size = std::fread(result.data(), 1, len, file);
- if (std::ferror(file) != 0) {
- std::fclose(file);
- return error_code(errno, system_category());
- } else if (size != len) {
- if (std::feof(file) != 0) {
- std::fclose(file);
- result.set_size(size);
- return make_error_code(errc::value_too_large);
- }
- }
- std::fclose(file);
- result.set_size(size);
- return error_code::success();
-}
-
-error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
- bool map_writable, void *&result) {
- SmallString<128> path_storage;
- StringRef name = path.toNullTerminatedStringRef(path_storage);
- int oflags = map_writable ? O_RDWR : O_RDONLY;
- int ofd = ::open(name.begin(), oflags);
- if ( ofd == -1 )
- return error_code(errno, system_category());
- AutoFD fd(ofd);
- int flags = map_writable ? MAP_SHARED : MAP_PRIVATE;
- int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ;
-#ifdef MAP_FILE
- flags |= MAP_FILE;
-#endif
- result = ::mmap(nullptr, size, prot, flags, fd, file_offset);
- if (result == MAP_FAILED) {
- return error_code(errno, system_category());
- }
-
- return error_code::success();
-}
-
-error_code unmap_file_pages(void *base, size_t size) {
- if ( ::munmap(base, size) == -1 )
- return error_code(errno, system_category());
-
- return error_code::success();
+ return std::error_code();
}
-error_code openFileForRead(const Twine &Name, int &ResultFD) {
+std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
SmallString<128> Storage;
StringRef P = Name.toNullTerminatedStringRef(Storage);
while ((ResultFD = open(P.begin(), O_RDONLY)) < 0) {
if (errno != EINTR)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
}
- return error_code::success();
+ return std::error_code();
}
-error_code openFileForWrite(const Twine &Name, int &ResultFD,
+std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
sys::fs::OpenFlags Flags, unsigned Mode) {
// Verify that we don't have both "append" and "excl".
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
@@ -717,9 +659,9 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
StringRef P = Name.toNullTerminatedStringRef(Storage);
while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) {
if (errno != EINTR)
- return error_code(errno, system_category());
+ return std::error_code(errno, std::generic_category());
}
- return error_code::success();
+ return std::error_code();
}
} // end namespace fs
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 8faa638..d2c5dbc 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -47,7 +47,6 @@
using namespace llvm;
using namespace sys;
-
process::id_type self_process::get_id() {
return getpid();
}
@@ -190,12 +189,13 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
return std::string(Val);
}
-error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
- ArrayRef<const char *> ArgsIn,
- SpecificBumpPtrAllocator<char> &) {
+std::error_code
+Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut,
+ ArrayRef<const char *> ArgsIn,
+ SpecificBumpPtrAllocator<char> &) {
ArgsOut.append(ArgsIn.begin(), ArgsIn.end());
- return error_code::success();
+ return std::error_code();
}
bool Process::StandardInIsUserInput() {
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 1225a9c..06a33cd 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -48,6 +48,7 @@
#endif
namespace llvm {
+
using namespace sys;
ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {}
@@ -349,7 +350,11 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
// Parent process: Wait for the child process to terminate.
int status;
ProcessInfo WaitResult;
- WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions);
+
+ do {
+ WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions);
+ } while (WaitUntilTerminates && WaitResult.Pid == -1 && errno == EINTR);
+
if (WaitResult.Pid != PI.Pid) {
if (WaitResult.Pid == 0) {
// Non-blocking wait.
@@ -425,14 +430,14 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return WaitResult;
}
-error_code sys::ChangeStdinToBinary(){
+ std::error_code sys::ChangeStdinToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return make_error_code(errc::success);
+ return std::error_code();
}
-error_code sys::ChangeStdoutToBinary(){
+ std::error_code sys::ChangeStdoutToBinary(){
// Do nothing, as Unix doesn't differentiate between text and binary.
- return make_error_code(errc::success);
+ return std::error_code();
}
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
diff --git a/lib/Support/Unix/system_error.inc b/lib/Support/Unix/system_error.inc
deleted file mode 100644
index 681e919..0000000
--- a/lib/Support/Unix/system_error.inc
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Unix specific implementation of the error_code
-// and error_condition classes.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//=== is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-using namespace llvm;
-
-std::string
-_system_error_category::message(int ev) const {
- return _do_message::message(ev);
-}
-
-error_condition
-_system_error_category::default_error_condition(int ev) const {
-#ifdef ELAST
- if (ev > ELAST)
- return error_condition(ev, system_category());
-#endif // ELAST
- return error_condition(ev, generic_category());
-}
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 5d0278f..5ed0b70 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -85,7 +85,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
}
SmallVector<wchar_t, MAX_PATH> filenameUnicode;
- if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
+ if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
SetLastError(ec.value());
MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
return DynamicLibrary();
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index ebe7878..ae8371a 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -15,6 +15,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Process.h"
+#include "llvm/Support/WindowsError.h"
// The Windows.h header must be the last one included.
#include "WindowsSupport.h"
@@ -69,8 +70,8 @@ namespace sys {
MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
const MemoryBlock *const NearBlock,
unsigned Flags,
- error_code &EC) {
- EC = error_code::success();
+ std::error_code &EC) {
+ EC = std::error_code();
if (NumBytes == 0)
return MemoryBlock();
@@ -99,7 +100,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
// Try again without the NearBlock hint
return allocateMappedMemory(NumBytes, NULL, Flags, EC);
}
- EC = error_code(::GetLastError(), system_category());
+ EC = mapWindowsError(::GetLastError());
return MemoryBlock();
}
@@ -113,34 +114,34 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes,
return Result;
}
-error_code Memory::releaseMappedMemory(MemoryBlock &M) {
+ std::error_code Memory::releaseMappedMemory(MemoryBlock &M) {
if (M.Address == 0 || M.Size == 0)
- return error_code::success();
+ return std::error_code();
if (!VirtualFree(M.Address, 0, MEM_RELEASE))
- return error_code(::GetLastError(), system_category());
+ return mapWindowsError(::GetLastError());
M.Address = 0;
M.Size = 0;
- return error_code::success();
+ return std::error_code();
}
-error_code Memory::protectMappedMemory(const MemoryBlock &M,
+ std::error_code Memory::protectMappedMemory(const MemoryBlock &M,
unsigned Flags) {
if (M.Address == 0 || M.Size == 0)
- return error_code::success();
+ return std::error_code();
DWORD Protect = getWindowsProtectionFlags(Flags);
DWORD OldFlags;
if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags))
- return error_code(::GetLastError(), system_category());
+ return mapWindowsError(::GetLastError());
if (Flags & MF_EXEC)
Memory::InvalidateInstructionCache(M.Address, M.Size);
- return error_code::success();
+ return std::error_code();
}
/// InvalidateInstructionCache - Before the JIT can run a block of code
@@ -156,18 +157,18 @@ MemoryBlock Memory::AllocateRWX(size_t NumBytes,
const MemoryBlock *NearBlock,
std::string *ErrMsg) {
MemoryBlock MB;
- error_code EC;
+ std::error_code EC;
MB = allocateMappedMemory(NumBytes, NearBlock,
MF_READ|MF_WRITE|MF_EXEC, EC);
- if (EC != error_code::success() && ErrMsg) {
+ if (EC != std::error_code() && ErrMsg) {
MakeErrMsg(ErrMsg, EC.message());
}
return MB;
}
bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
- error_code EC = releaseMappedMemory(M);
- if (EC == error_code::success())
+ std::error_code EC = releaseMappedMemory(M);
+ if (EC == std::error_code())
return false;
MakeErrMsg(ErrMsg, EC.message());
return true;
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index e59888e..7a1bc04 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/WindowsError.h"
#include <fcntl.h>
#include <io.h>
#include <sys/stat.h>
@@ -44,7 +45,11 @@ using namespace llvm;
using llvm::sys::windows::UTF8ToUTF16;
using llvm::sys::windows::UTF16ToUTF8;
-static error_code TempDir(SmallVectorImpl<char> &Result) {
+static std::error_code windows_error(DWORD E) {
+ return mapWindowsError(E);
+}
+
+static std::error_code TempDir(SmallVectorImpl<char> &Result) {
SmallVector<wchar_t, 64> Res;
retry_temp_dir:
DWORD Len = ::GetTempPathW(Res.capacity(), Res.begin());
@@ -119,7 +124,7 @@ TimeValue file_status::getLastModificationTime() const {
return Ret;
}
-error_code current_path(SmallVectorImpl<char> &result) {
+std::error_code current_path(SmallVectorImpl<char> &result) {
SmallVector<wchar_t, MAX_PATH> cur_path;
DWORD len = MAX_PATH;
@@ -141,30 +146,30 @@ error_code current_path(SmallVectorImpl<char> &result) {
return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
}
-error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
- if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
- path_utf16))
+ if (std::error_code ec =
+ UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
return ec;
if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
- error_code ec = windows_error(::GetLastError());
- if (ec != windows_error::already_exists || !IgnoreExisting)
- return ec;
+ DWORD LastError = ::GetLastError();
+ if (LastError != ERROR_ALREADY_EXISTS || !IgnoreExisting)
+ return windows_error(LastError);
}
- return error_code::success();
+ return std::error_code();
}
-error_code normalize_separators(SmallVectorImpl<char> &Path) {
+std::error_code normalize_separators(SmallVectorImpl<char> &Path) {
(void) Path;
- return error_code::success();
+ return std::error_code();
}
// We can't use symbolic links for windows.
-error_code create_link(const Twine &to, const Twine &from) {
+std::error_code create_link(const Twine &to, const Twine &from) {
// Get arguments.
SmallString<128> from_storage;
SmallString<128> to_storage;
@@ -174,47 +179,49 @@ error_code create_link(const Twine &to, const Twine &from) {
// Convert to utf-16.
SmallVector<wchar_t, 128> wide_from;
SmallVector<wchar_t, 128> wide_to;
- if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
- if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+ if (std::error_code ec = UTF8ToUTF16(f, wide_from))
+ return ec;
+ if (std::error_code ec = UTF8ToUTF16(t, wide_to))
+ return ec;
if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
return windows_error(::GetLastError());
- return error_code::success();
+ return std::error_code();
}
-error_code remove(const Twine &path, bool IgnoreNonExisting) {
+std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
file_status ST;
- if (error_code EC = status(path, ST)) {
+ if (std::error_code EC = status(path, ST)) {
if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting)
return EC;
- return error_code::success();
+ return std::error_code();
}
- if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
- path_utf16))
+ if (std::error_code ec =
+ UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
return ec;
if (ST.type() == file_type::directory_file) {
if (!::RemoveDirectoryW(c_str(path_utf16))) {
- error_code EC = windows_error(::GetLastError());
+ std::error_code EC = windows_error(::GetLastError());
if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting)
return EC;
}
- return error_code::success();
+ return std::error_code();
}
if (!::DeleteFileW(c_str(path_utf16))) {
- error_code EC = windows_error(::GetLastError());
+ std::error_code EC = windows_error(::GetLastError());
if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting)
return EC;
}
- return error_code::success();
+ return std::error_code();
}
-error_code rename(const Twine &from, const Twine &to) {
+std::error_code rename(const Twine &from, const Twine &to) {
// Get arguments.
SmallString<128> from_storage;
SmallString<128> to_storage;
@@ -224,16 +231,18 @@ error_code rename(const Twine &from, const Twine &to) {
// Convert to utf-16.
SmallVector<wchar_t, 128> wide_from;
SmallVector<wchar_t, 128> wide_to;
- if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
- if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+ if (std::error_code ec = UTF8ToUTF16(f, wide_from))
+ return ec;
+ if (std::error_code ec = UTF8ToUTF16(t, wide_to))
+ return ec;
- error_code ec = error_code::success();
+ std::error_code ec = std::error_code();
for (int i = 0; i < 2000; i++) {
if (::MoveFileExW(wide_from.begin(), wide_to.begin(),
MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
- return error_code::success();
- ec = windows_error(::GetLastError());
- if (ec != windows_error::access_denied)
+ return std::error_code();
+ DWORD LastError = ::GetLastError();
+ if (LastError != ERROR_ACCESS_DENIED)
break;
// Retry MoveFile() at ACCESS_DENIED.
// System scanners (eg. indexer) might open the source file when
@@ -244,46 +253,46 @@ error_code rename(const Twine &from, const Twine &to) {
return ec;
}
-error_code resize_file(const Twine &path, uint64_t size) {
+std::error_code resize_file(const Twine &path, uint64_t size) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
- if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
- path_utf16))
+ if (std::error_code ec =
+ UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
return ec;
int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE);
if (fd == -1)
- return error_code(errno, generic_category());
+ return std::error_code(errno, std::generic_category());
#ifdef HAVE__CHSIZE_S
errno_t error = ::_chsize_s(fd, size);
#else
errno_t error = ::_chsize(fd, size);
#endif
::close(fd);
- return error_code(error, generic_category());
+ return std::error_code(error, std::generic_category());
}
-error_code exists(const Twine &path, bool &result) {
+std::error_code exists(const Twine &path, bool &result) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
- if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
- path_utf16))
+ if (std::error_code ec =
+ UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))
return ec;
DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
if (attributes == INVALID_FILE_ATTRIBUTES) {
// See if the file didn't actually exist.
- error_code ec = make_error_code(windows_error(::GetLastError()));
- if (ec != windows_error::file_not_found &&
- ec != windows_error::path_not_found)
- return ec;
+ DWORD LastError = ::GetLastError();
+ if (LastError != ERROR_FILE_NOT_FOUND &&
+ LastError != ERROR_PATH_NOT_FOUND)
+ return windows_error(LastError);
result = false;
} else
result = true;
- return error_code::success();
+ return std::error_code();
}
bool can_write(const Twine &Path) {
@@ -320,12 +329,14 @@ bool equivalent(file_status A, file_status B) {
A.VolumeSerialNumber == B.VolumeSerialNumber;
}
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
file_status fsA, fsB;
- if (error_code ec = status(A, fsA)) return ec;
- if (error_code ec = status(B, fsB)) return ec;
+ if (std::error_code ec = status(A, fsA))
+ return ec;
+ if (std::error_code ec = status(B, fsB))
+ return ec;
result = equivalent(fsA, fsB);
- return error_code::success();
+ return std::error_code();
}
static bool isReservedName(StringRef path) {
@@ -351,7 +362,7 @@ static bool isReservedName(StringRef path) {
return false;
}
-static error_code getStatus(HANDLE FileHandle, file_status &Result) {
+static std::error_code getStatus(HANDLE FileHandle, file_status &Result) {
if (FileHandle == INVALID_HANDLE_VALUE)
goto handle_status_error;
@@ -363,16 +374,16 @@ static error_code getStatus(HANDLE FileHandle, file_status &Result) {
if (Err != NO_ERROR)
return windows_error(Err);
Result = file_status(file_type::type_unknown);
- return error_code::success();
+ return std::error_code();
}
case FILE_TYPE_DISK:
break;
case FILE_TYPE_CHAR:
Result = file_status(file_type::character_file);
- return error_code::success();
+ return std::error_code();
case FILE_TYPE_PIPE:
Result = file_status(file_type::fifo_file);
- return error_code::success();
+ return std::error_code();
}
BY_HANDLE_FILE_INFORMATION Info;
@@ -388,32 +399,32 @@ static error_code getStatus(HANDLE FileHandle, file_status &Result) {
Info.ftLastWriteTime.dwLowDateTime,
Info.dwVolumeSerialNumber, Info.nFileSizeHigh,
Info.nFileSizeLow, Info.nFileIndexHigh, Info.nFileIndexLow);
- return error_code::success();
+ return std::error_code();
}
handle_status_error:
- error_code EC = windows_error(::GetLastError());
- if (EC == windows_error::file_not_found ||
- EC == windows_error::path_not_found)
+ DWORD LastError = ::GetLastError();
+ if (LastError == ERROR_FILE_NOT_FOUND ||
+ LastError == ERROR_PATH_NOT_FOUND)
Result = file_status(file_type::file_not_found);
- else if (EC == windows_error::sharing_violation)
+ else if (LastError == ERROR_SHARING_VIOLATION)
Result = file_status(file_type::type_unknown);
else
Result = file_status(file_type::status_error);
- return EC;
+ return windows_error(LastError);
}
-error_code status(const Twine &path, file_status &result) {
+std::error_code status(const Twine &path, file_status &result) {
SmallString<128> path_storage;
SmallVector<wchar_t, 128> path_utf16;
StringRef path8 = path.toStringRef(path_storage);
if (isReservedName(path8)) {
result = file_status(file_type::character_file);
- return error_code::success();
+ return std::error_code();
}
- if (error_code ec = UTF8ToUTF16(path8, path_utf16))
+ if (std::error_code ec = UTF8ToUTF16(path8, path_utf16))
return ec;
DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -444,12 +455,12 @@ error_code status(const Twine &path, file_status &result) {
return getStatus(h, result);
}
-error_code status(int FD, file_status &Result) {
+std::error_code status(int FD, file_status &Result) {
HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
return getStatus(FileHandle, Result);
}
-error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
+std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
ULARGE_INTEGER UI;
UI.QuadPart = Time.toWin32Time();
FILETIME FT;
@@ -458,52 +469,10 @@ error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
if (!SetFileTime(FileHandle, NULL, &FT, &FT))
return windows_error(::GetLastError());
- return error_code::success();
-}
-
-error_code get_magic(const Twine &path, uint32_t len,
- SmallVectorImpl<char> &result) {
- SmallString<128> path_storage;
- SmallVector<wchar_t, 128> path_utf16;
- result.set_size(0);
-
- // Convert path to UTF-16.
- if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
- path_utf16))
- return ec;
-
- // Open file.
- HANDLE file = ::CreateFileW(c_str(path_utf16),
- GENERIC_READ,
- FILE_SHARE_READ,
- NULL,
- OPEN_EXISTING,
- FILE_ATTRIBUTE_READONLY,
- NULL);
- if (file == INVALID_HANDLE_VALUE)
- return windows_error(::GetLastError());
-
- // Allocate buffer.
- result.reserve(len);
-
- // Get magic!
- DWORD bytes_read = 0;
- BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
- error_code ec = windows_error(::GetLastError());
- ::CloseHandle(file);
- if (!read_success || (bytes_read != len)) {
- // Set result size to the number of bytes read if it's valid.
- if (bytes_read <= len)
- result.set_size(bytes_read);
- // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
- return ec;
- }
-
- result.set_size(len);
- return error_code::success();
+ return std::error_code();
}
-error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+std::error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
FileDescriptor = FD;
// Make sure that the requested size fits within SIZE_T.
if (Size > std::numeric_limits<SIZE_T>::max()) {
@@ -528,7 +497,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
(Offset + Size) & 0xffffffff,
0);
if (FileMappingHandle == NULL) {
- error_code ec = windows_error(GetLastError());
+ std::error_code ec = windows_error(GetLastError());
if (FileDescriptor) {
if (CloseFD)
_close(FileDescriptor);
@@ -549,7 +518,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
Offset & 0xffffffff,
Size);
if (Mapping == NULL) {
- error_code ec = windows_error(GetLastError());
+ std::error_code ec = windows_error(GetLastError());
::CloseHandle(FileMappingHandle);
if (FileDescriptor) {
if (CloseFD)
@@ -563,7 +532,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
MEMORY_BASIC_INFORMATION mbi;
SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi));
if (Result == 0) {
- error_code ec = windows_error(GetLastError());
+ std::error_code ec = windows_error(GetLastError());
::UnmapViewOfFile(Mapping);
::CloseHandle(FileMappingHandle);
if (FileDescriptor) {
@@ -584,14 +553,14 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
_close(FileDescriptor); // Also closes FileHandle.
} else
::CloseHandle(FileHandle);
- return error_code::success();
+ return std::error_code();
}
mapped_file_region::mapped_file_region(const Twine &path,
mapmode mode,
uint64_t length,
uint64_t offset,
- error_code &ec)
+ std::error_code &ec)
: Mode(mode)
, Size(length)
, Mapping()
@@ -636,7 +605,7 @@ mapped_file_region::mapped_file_region(int fd,
mapmode mode,
uint64_t length,
uint64_t offset,
- error_code &ec)
+ std::error_code &ec)
: Mode(mode)
, Size(length)
, Mapping()
@@ -704,12 +673,11 @@ int mapped_file_region::alignment() {
return SysInfo.dwAllocationGranularity;
}
-error_code detail::directory_iterator_construct(detail::DirIterState &it,
+std::error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallVector<wchar_t, 128> path_utf16;
- if (error_code ec = UTF8ToUTF16(path,
- path_utf16))
+ if (std::error_code ec = UTF8ToUTF16(path, path_utf16))
return ec;
// Convert path to the format that Windows is happy with.
@@ -733,19 +701,19 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it,
(FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
FirstFind.cFileName[1] == L'.'))
if (!::FindNextFileW(FindHandle, &FirstFind)) {
- error_code ec = windows_error(::GetLastError());
+ DWORD LastError = ::GetLastError();
// Check for end.
- if (ec == windows_error::no_more_files)
+ if (LastError == ERROR_NO_MORE_FILES)
return detail::directory_iterator_destruct(it);
- return ec;
+ return windows_error(LastError);
} else
FilenameLen = ::wcslen(FirstFind.cFileName);
// Construct the current directory entry.
SmallString<128> directory_entry_name_utf8;
- if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
- ::wcslen(FirstFind.cFileName),
- directory_entry_name_utf8))
+ if (std::error_code ec =
+ UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName),
+ directory_entry_name_utf8))
return ec;
it.IterationHandle = intptr_t(FindHandle.take());
@@ -753,26 +721,26 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it,
path::append(directory_entry_path, directory_entry_name_utf8.str());
it.CurrentEntry = directory_entry(directory_entry_path.str());
- return error_code::success();
+ return std::error_code();
}
-error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
if (it.IterationHandle != 0)
// Closes the handle if it's valid.
ScopedFindHandle close(HANDLE(it.IterationHandle));
it.IterationHandle = 0;
it.CurrentEntry = directory_entry();
- return error_code::success();
+ return std::error_code();
}
-error_code detail::directory_iterator_increment(detail::DirIterState &it) {
+std::error_code detail::directory_iterator_increment(detail::DirIterState &it) {
WIN32_FIND_DATAW FindData;
if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
- error_code ec = windows_error(::GetLastError());
+ DWORD LastError = ::GetLastError();
// Check for end.
- if (ec == windows_error::no_more_files)
+ if (LastError == ERROR_NO_MORE_FILES)
return detail::directory_iterator_destruct(it);
- return ec;
+ return windows_error(LastError);
}
size_t FilenameLen = ::wcslen(FindData.cFileName);
@@ -782,60 +750,50 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) {
return directory_iterator_increment(it);
SmallString<128> directory_entry_path_utf8;
- if (error_code ec = UTF16ToUTF8(FindData.cFileName,
- ::wcslen(FindData.cFileName),
- directory_entry_path_utf8))
+ if (std::error_code ec =
+ UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName),
+ directory_entry_path_utf8))
return ec;
it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
- return error_code::success();
+ return std::error_code();
}
-error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
- bool map_writable, void *&result) {
- assert(0 && "NOT IMPLEMENTED");
- return windows_error::invalid_function;
-}
-
-error_code unmap_file_pages(void *base, size_t size) {
- assert(0 && "NOT IMPLEMENTED");
- return windows_error::invalid_function;
-}
-
-error_code openFileForRead(const Twine &Name, int &ResultFD) {
+std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
SmallString<128> PathStorage;
SmallVector<wchar_t, 128> PathUTF16;
- if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage),
- PathUTF16))
+ if (std::error_code EC =
+ UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16))
return EC;
HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
- error_code EC = windows_error(::GetLastError());
+ DWORD LastError = ::GetLastError();
+ std::error_code EC = windows_error(LastError);
// Provide a better error message when trying to open directories.
// This only runs if we failed to open the file, so there is probably
// no performances issues.
- if (EC != windows_error::access_denied)
+ if (LastError != ERROR_ACCESS_DENIED)
return EC;
if (is_directory(Name))
- return error_code(errc::is_a_directory, posix_category());
+ return make_error_code(errc::is_a_directory);
return EC;
}
int FD = ::_open_osfhandle(intptr_t(H), 0);
if (FD == -1) {
::CloseHandle(H);
- return windows_error::invalid_handle;
+ return windows_error(ERROR_INVALID_HANDLE);
}
ResultFD = FD;
- return error_code::success();
+ return std::error_code();
}
-error_code openFileForWrite(const Twine &Name, int &ResultFD,
+std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
sys::fs::OpenFlags Flags, unsigned Mode) {
// Verify that we don't have both "append" and "excl".
assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) &&
@@ -844,8 +802,8 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
SmallString<128> PathStorage;
SmallVector<wchar_t, 128> PathUTF16;
- if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage),
- PathUTF16))
+ if (std::error_code EC =
+ UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16))
return EC;
DWORD CreationDisposition;
@@ -865,14 +823,15 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL);
if (H == INVALID_HANDLE_VALUE) {
- error_code EC = windows_error(::GetLastError());
+ DWORD LastError = ::GetLastError();
+ std::error_code EC = windows_error(LastError);
// Provide a better error message when trying to open directories.
// This only runs if we failed to open the file, so there is probably
// no performances issues.
- if (EC != windows_error::access_denied)
+ if (LastError != ERROR_ACCESS_DENIED)
return EC;
if (is_directory(Name))
- return error_code(errc::is_a_directory, posix_category());
+ return make_error_code(errc::is_a_directory);
return EC;
}
@@ -886,11 +845,11 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD,
int FD = ::_open_osfhandle(intptr_t(H), OpenFlags);
if (FD == -1) {
::CloseHandle(H);
- return windows_error::invalid_handle;
+ return windows_error(ERROR_INVALID_HANDLE);
}
ResultFD = FD;
- return error_code::success();
+ return std::error_code();
}
} // end namespace fs
@@ -911,14 +870,14 @@ bool home_directory(SmallVectorImpl<char> &result) {
} // end namespace path
namespace windows {
-llvm::error_code UTF8ToUTF16(llvm::StringRef utf8,
- llvm::SmallVectorImpl<wchar_t> &utf16) {
+std::error_code UTF8ToUTF16(llvm::StringRef utf8,
+ llvm::SmallVectorImpl<wchar_t> &utf16) {
if (!utf8.empty()) {
int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(),
utf8.size(), utf16.begin(), 0);
if (len == 0)
- return llvm::windows_error(::GetLastError());
+ return windows_error(::GetLastError());
utf16.reserve(len + 1);
utf16.set_size(len);
@@ -927,25 +886,25 @@ llvm::error_code UTF8ToUTF16(llvm::StringRef utf8,
utf8.size(), utf16.begin(), utf16.size());
if (len == 0)
- return llvm::windows_error(::GetLastError());
+ return windows_error(::GetLastError());
}
// Make utf16 null terminated.
utf16.push_back(0);
utf16.pop_back();
- return llvm::error_code::success();
+ return std::error_code();
}
-llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
- llvm::SmallVectorImpl<char> &utf8) {
+std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ llvm::SmallVectorImpl<char> &utf8) {
if (utf16_len) {
// Get length.
int len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.begin(),
0, NULL, NULL);
if (len == 0)
- return llvm::windows_error(::GetLastError());
+ return windows_error(::GetLastError());
utf8.reserve(len);
utf8.set_size(len);
@@ -955,14 +914,14 @@ llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
utf8.size(), NULL, NULL);
if (len == 0)
- return llvm::windows_error(::GetLastError());
+ return windows_error(::GetLastError());
}
// Make utf8 null terminated.
utf8.push_back(0);
utf8.pop_back();
- return llvm::error_code::success();
+ return std::error_code();
}
} // end namespace windows
} // end namespace sys
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index c3df801..81aee0e 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/WindowsError.h"
#include <malloc.h>
// The Windows.h header must be after LLVM and standard headers.
@@ -47,7 +49,6 @@
using namespace llvm;
using namespace sys;
-
process::id_type self_process::get_id() {
return GetCurrentProcessId();
}
@@ -178,12 +179,16 @@ Optional<std::string> Process::GetEnv(StringRef Name) {
return std::string(Res.data());
}
-error_code
+static std::error_code windows_error(DWORD E) {
+ return mapWindowsError(E);
+}
+
+std::error_code
Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
ArrayRef<const char *>,
SpecificBumpPtrAllocator<char> &ArgAllocator) {
int NewArgCount;
- error_code ec;
+ std::error_code ec;
wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(),
&NewArgCount);
@@ -208,7 +213,7 @@ Process::GetArgumentVector(SmallVectorImpl<const char *> &Args,
if (ec)
return ec;
- return error_code::success();
+ return std::error_code();
}
bool Process::StandardInIsUserInput() {
@@ -363,12 +368,12 @@ unsigned Process::GetRandomNumber() {
HCRYPTPROV HCPC;
if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL,
CRYPT_VERIFYCONTEXT))
- assert(false && "Could not acquire a cryptographic context");
+ report_fatal_error("Could not acquire a cryptographic context");
ScopedCryptContext CryptoProvider(HCPC);
unsigned Ret;
if (!::CryptGenRandom(CryptoProvider, sizeof(Ret),
reinterpret_cast<BYTE *>(&Ret)))
- assert(false && "Could not generate a random number");
+ report_fatal_error("Could not generate a random number");
return Ret;
}
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 5827c10..b2f71ae 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -226,7 +226,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
// an environment block by concatenating them.
for (unsigned i = 0; envp[i]; ++i) {
SmallVector<wchar_t, MAX_PATH> EnvString;
- if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) {
+ if (std::error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16");
return false;
@@ -290,7 +290,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
fflush(stderr);
SmallVector<wchar_t, MAX_PATH> ProgramUtf16;
- if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) {
+ if (std::error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert application name to UTF-16"));
@@ -298,7 +298,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
}
SmallVector<wchar_t, MAX_PATH> CommandUtf16;
- if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
+ if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) {
SetLastError(ec.value());
MakeErrMsg(ErrMsg,
std::string("Unable to convert command-line to UTF-16"));
@@ -422,18 +422,18 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
return WaitResult;
}
-error_code sys::ChangeStdinToBinary(){
+ std::error_code sys::ChangeStdinToBinary(){
int result = _setmode( _fileno(stdin), _O_BINARY );
if (result == -1)
- return error_code(errno, generic_category());
- return make_error_code(errc::success);
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
}
-error_code sys::ChangeStdoutToBinary(){
+ std::error_code sys::ChangeStdoutToBinary(){
int result = _setmode( _fileno(stdout), _O_BINARY );
if (result == -1)
- return error_code(errno, generic_category());
- return make_error_code(errc::success);
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
}
bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) {
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index 6bef444..f68835b 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -32,7 +32,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h" // Get build system configuration settings
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/system_error.h"
+#include <system_error>
#include <windows.h>
#include <wincrypt.h>
#include <cassert>
@@ -163,10 +163,9 @@ c_str(SmallVectorImpl<T> &str) {
namespace sys {
namespace windows {
-error_code UTF8ToUTF16(StringRef utf8,
- SmallVectorImpl<wchar_t> &utf16);
-error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
- SmallVectorImpl<char> &utf8);
+std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16);
+std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+ SmallVectorImpl<char> &utf8);
} // end namespace windows
} // end namespace sys
} // end namespace llvm.
diff --git a/lib/Support/Windows/system_error.inc b/lib/Support/Windows/system_error.inc
deleted file mode 100644
index 37ec81d..0000000
--- a/lib/Support/Windows/system_error.inc
+++ /dev/null
@@ -1,142 +0,0 @@
-//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Windows specific implementation of the error_code
-// and error_condition classes.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic Windows code that
-//=== is guaranteed to work on *all* Windows variants.
-//===----------------------------------------------------------------------===//
-
-#include <windows.h>
-#include <winerror.h>
-
-using namespace llvm;
-
-std::string
-_system_error_category::message(int ev) const {
- LPVOID lpMsgBuf = 0;
- DWORD retval = ::FormatMessageA(
- FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL,
- ev,
- MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
- (LPSTR) &lpMsgBuf,
- 0,
- NULL);
- if (retval == 0) {
- ::LocalFree(lpMsgBuf);
- return std::string("Unknown error");
- }
-
- std::string str( static_cast<LPCSTR>(lpMsgBuf) );
- ::LocalFree(lpMsgBuf);
-
- while (str.size()
- && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
- str.erase( str.size()-1 );
- if (str.size() && str[str.size()-1] == '.')
- str.erase( str.size()-1 );
- return str;
-}
-
-// I'd rather not double the line count of the following.
-#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
-
-error_condition
-_system_error_category::default_error_condition(int ev) const {
- switch (ev) {
- MAP_ERR_TO_COND(0, success);
- // Windows system -> posix_errno decode table ---------------------------//
- // see WinError.h comments for descriptions of errors
- MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
- MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
- MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
- MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
- MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
- MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
- MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied);
- MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error);
- MAP_ERR_TO_COND(ERROR_CANTREAD, io_error);
- MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error);
- MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied);
- MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device);
- MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy);
- MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty);
- MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument);
- MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device);
- MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists);
- MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory);
- MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device);
- MAP_ERR_TO_COND(ERROR_HANDLE_EOF, value_too_large);
- MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied);
- MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device);
- MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
- MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
- MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
- MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
- MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
- MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
- MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
- MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
- MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
- MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE, cross_device_link);
- MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
- MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
- MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED, operation_canceled);
- MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
- MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
- MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
- MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
- MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
- MAP_ERR_TO_COND(ERROR_SEEK, io_error);
- MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
- MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
- MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error);
- MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied);
- MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT, timed_out);
- MAP_ERR_TO_COND(WSAEACCES, permission_denied);
- MAP_ERR_TO_COND(WSAEADDRINUSE, address_in_use);
- MAP_ERR_TO_COND(WSAEADDRNOTAVAIL, address_not_available);
- MAP_ERR_TO_COND(WSAEAFNOSUPPORT, address_family_not_supported);
- MAP_ERR_TO_COND(WSAEALREADY, connection_already_in_progress);
- MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor);
- MAP_ERR_TO_COND(WSAECONNABORTED, connection_aborted);
- MAP_ERR_TO_COND(WSAECONNREFUSED, connection_refused);
- MAP_ERR_TO_COND(WSAECONNRESET, connection_reset);
- MAP_ERR_TO_COND(WSAEDESTADDRREQ, destination_address_required);
- MAP_ERR_TO_COND(WSAEFAULT, bad_address);
- MAP_ERR_TO_COND(WSAEHOSTUNREACH, host_unreachable);
- MAP_ERR_TO_COND(WSAEINPROGRESS, operation_in_progress);
- MAP_ERR_TO_COND(WSAEINTR, interrupted);
- MAP_ERR_TO_COND(WSAEINVAL, invalid_argument);
- MAP_ERR_TO_COND(WSAEISCONN, already_connected);
- MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open);
- MAP_ERR_TO_COND(WSAEMSGSIZE, message_size);
- MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long);
- MAP_ERR_TO_COND(WSAENETDOWN, network_down);
- MAP_ERR_TO_COND(WSAENETRESET, network_reset);
- MAP_ERR_TO_COND(WSAENETUNREACH, network_unreachable);
- MAP_ERR_TO_COND(WSAENOBUFS, no_buffer_space);
- MAP_ERR_TO_COND(WSAENOPROTOOPT, no_protocol_option);
- MAP_ERR_TO_COND(WSAENOTCONN, not_connected);
- MAP_ERR_TO_COND(WSAENOTSOCK, not_a_socket);
- MAP_ERR_TO_COND(WSAEOPNOTSUPP, operation_not_supported);
- MAP_ERR_TO_COND(WSAEPROTONOSUPPORT, protocol_not_supported);
- MAP_ERR_TO_COND(WSAEPROTOTYPE, wrong_protocol_type);
- MAP_ERR_TO_COND(WSAETIMEDOUT, timed_out);
- MAP_ERR_TO_COND(WSAEWOULDBLOCK, operation_would_block);
- default: return error_condition(ev, system_category());
- }
-}
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index e5f9494..5212624 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Support/Errc.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Casting.h"
@@ -56,9 +57,7 @@ Input::Input(StringRef InputContent,
Input::~Input() {
}
-error_code Input::error() {
- return EC;
-}
+std::error_code Input::error() { return EC; }
// Pin the vtables to this file.
void Input::HNode::anchor() {}
@@ -90,8 +89,8 @@ bool Input::setCurrentDocument() {
return false;
}
-void Input::nextDocument() {
- ++DocIterator;
+bool Input::nextDocument() {
+ return ++DocIterator != Strm->end();
}
bool Input::mapTag(StringRef Tag, bool Default) {
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index f55838e..f7c213a 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -22,10 +22,10 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
-#include "llvm/Support/system_error.h"
#include <cctype>
#include <cerrno>
#include <sys/stat.h>
+#include <system_error>
// <fcntl.h> may provide O_BINARY.
#if defined(HAVE_FCNTL_H)
@@ -450,7 +450,7 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
return;
}
- error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags);
+ std::error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags);
if (EC) {
ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " +
diff --git a/lib/Support/regcclass.h b/lib/Support/regcclass.h
index 2cea3e4..7fd6604 100644
--- a/lib/Support/regcclass.h
+++ b/lib/Support/regcclass.h
@@ -37,6 +37,9 @@
* @(#)cclass.h 8.3 (Berkeley) 3/20/94
*/
+#ifndef LLVM_SUPPORT_REGCCLASS_H
+#define LLVM_SUPPORT_REGCCLASS_H
+
/* character-class table */
static struct cclass {
const char *name;
@@ -68,3 +71,5 @@ static struct cclass {
""} ,
{ NULL, 0, "" }
};
+
+#endif
diff --git a/lib/Support/regcname.h b/lib/Support/regcname.h
index 3c0bb24..891d255 100644
--- a/lib/Support/regcname.h
+++ b/lib/Support/regcname.h
@@ -35,6 +35,9 @@
* @(#)cname.h 8.3 (Berkeley) 3/20/94
*/
+#ifndef LLVM_SUPPORT_REGCNAME_H
+#define LLVM_SUPPORT_REGCNAME_H
+
/* character-name table */
static struct cname {
const char *name;
@@ -137,3 +140,5 @@ static struct cname {
{ "DEL", '\177' },
{ NULL, 0 }
};
+
+#endif
diff --git a/lib/Support/regex2.h b/lib/Support/regex2.h
index 21659c3..d81bfbc 100644
--- a/lib/Support/regex2.h
+++ b/lib/Support/regex2.h
@@ -35,6 +35,9 @@
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
*/
+#ifndef LLVM_SUPPORT_REGEX2_H
+#define LLVM_SUPPORT_REGEX2_H
+
/*
* internals of regex_t
*/
@@ -155,3 +158,5 @@ struct re_guts {
/* misc utilities */
#define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c&0xff) || (c) == '_')
+
+#endif
diff --git a/lib/Support/regutils.h b/lib/Support/regutils.h
index d0ee100..49a975c 100644
--- a/lib/Support/regutils.h
+++ b/lib/Support/regutils.h
@@ -35,6 +35,9 @@
* @(#)utils.h 8.3 (Berkeley) 3/20/94
*/
+#ifndef LLVM_SUPPORT_REGUTILS_H
+#define LLVM_SUPPORT_REGUTILS_H
+
/* utility definitions */
#define NC (CHAR_MAX - CHAR_MIN + 1)
typedef unsigned char uch;
@@ -51,3 +54,5 @@ typedef unsigned char uch;
#ifdef USEBCOPY
#define memmove(d, s, c) bcopy(s, d, c)
#endif
+
+#endif
diff --git a/lib/Support/system_error.cpp b/lib/Support/system_error.cpp
deleted file mode 100644
index 299f54a..0000000
--- a/lib/Support/system_error.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//===---------------------- system_error.cpp ------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This was lifted from libc++ and modified for C++03.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/Errno.h"
-#include <cstring>
-#include <string>
-
-namespace llvm {
-
-// class error_category
-
-error_category::error_category() {
-}
-
-error_category::~error_category() {
-}
-
-error_condition
-error_category::default_error_condition(int ev) const {
- return error_condition(ev, *this);
-}
-
-bool
-error_category::equivalent(int code, const error_condition& condition) const {
- return default_error_condition(code) == condition;
-}
-
-bool
-error_category::equivalent(const error_code& code, int condition) const {
- return *this == code.category() && code.value() == condition;
-}
-
-std::string
-_do_message::message(int ev) const {
- return std::string(sys::StrError(ev));
-}
-
-class _generic_error_category : public _do_message {
-public:
- const char* name() const override;
- std::string message(int ev) const override;
-};
-
-const char*
-_generic_error_category::name() const {
- return "generic";
-}
-
-std::string
-_generic_error_category::message(int ev) const {
-#ifdef ELAST
- if (ev > ELAST)
- return std::string("unspecified generic_category error");
-#endif // ELAST
- return _do_message::message(ev);
-}
-
-const error_category&
-generic_category() {
- static _generic_error_category s;
- return s;
-}
-
-class _system_error_category : public _do_message {
-public:
- const char* name() const override;
- std::string message(int ev) const override;
- error_condition default_error_condition(int ev) const override;
-};
-
-const char*
-_system_error_category::name() const {
- return "system";
-}
-
-// std::string _system_error_category::message(int ev) const {
-// Is in Platform/system_error.inc
-
-// error_condition _system_error_category::default_error_condition(int ev) const
-// Is in Platform/system_error.inc
-
-const error_category&
-system_category() {
- static _system_error_category s;
- return s;
-}
-
-const error_category&
-posix_category() {
-#ifdef LLVM_ON_WIN32
- return generic_category();
-#else
- return system_category();
-#endif
-}
-
-// error_condition
-
-std::string
-error_condition::message() const {
- return _cat_->message(_val_);
-}
-
-// error_code
-
-std::string
-error_code::message() const {
- return _cat_->message(_val_);
-}
-
-} // end namespace llvm
-
-// Include the truly platform-specific parts of this class.
-#if defined(LLVM_ON_UNIX)
-#include "Unix/system_error.inc"
-#endif
-#if defined(LLVM_ON_WIN32)
-#include "Windows/system_error.inc"
-#endif
diff --git a/lib/TableGen/Android.mk b/lib/TableGen/Android.mk
index 1f01ef7..0fd94bb 100644
--- a/lib/TableGen/Android.mk
+++ b/lib/TableGen/Android.mk
@@ -4,6 +4,7 @@ libtablegen_SRC_FILES := \
Error.cpp \
Main.cpp \
Record.cpp \
+ SetTheory.cpp \
StringMatcher.cpp \
TableGenBackend.cpp \
TGLexer.cpp \
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 935d674..fb70218 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMTableGen
Error.cpp
Main.cpp
Record.cpp
+ SetTheory.cpp
StringMatcher.cpp
TableGenBackend.cpp
TGLexer.cpp
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index 476026d..e317fbf 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -20,12 +20,12 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/system_error.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Main.h"
#include "llvm/TableGen/Record.h"
#include <algorithm>
#include <cstdio>
+#include <system_error>
using namespace llvm;
namespace {
@@ -81,14 +81,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) {
RecordKeeper Records;
// Parse the input file.
- std::unique_ptr<MemoryBuffer> File;
- if (error_code ec =
- MemoryBuffer::getFileOrSTDIN(InputFilename, File)) {
- errs() << "Could not open input file '" << InputFilename << "': "
- << ec.message() <<"\n";
+ ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+ MemoryBuffer::getFileOrSTDIN(InputFilename);
+ if (std::error_code EC = FileOrErr.getError()) {
+ errs() << "Could not open input file '" << InputFilename
+ << "': " << EC.message() << "\n";
return 1;
}
- MemoryBuffer *F = File.release();
+ MemoryBuffer *F = FileOrErr.get().release();
// Tell SrcMgr about this buffer, which is what TGParser will pick up.
SrcMgr.AddNewSourceBuffer(F, SMLoc());
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index c553a21..f7843dc 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -811,20 +811,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
}
case HEAD: {
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
- if (LHSl->getSize() == 0) {
- assert(0 && "Empty list in car");
- return nullptr;
- }
+ assert(LHSl->getSize() != 0 && "Empty list in car");
return LHSl->getElement(0);
}
break;
}
case TAIL: {
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
- if (LHSl->getSize() == 0) {
- assert(0 && "Empty list in cdr");
- return nullptr;
- }
+ assert(LHSl->getSize() != 0 && "Empty list in cdr");
// Note the +1. We can't just pass the result of getValues()
// directly.
ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1;
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
new file mode 100644
index 0000000..c99c2ba
--- /dev/null
+++ b/lib/TableGen/SetTheory.cpp
@@ -0,0 +1,323 @@
+//===- SetTheory.cpp - Generate ordered sets from DAG expressions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SetTheory class that computes ordered sets of
+// Records from DAG expressions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/SetTheory.h"
+
+using namespace llvm;
+
+// Define the standard operators.
+namespace {
+
+typedef SetTheory::RecSet RecSet;
+typedef SetTheory::RecVec RecVec;
+
+// (add a, b, ...) Evaluate and union all arguments.
+struct AddOp : public SetTheory::Operator {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc);
+ }
+};
+
+// (sub Add, Sub, ...) Set difference.
+struct SubOp : public SetTheory::Operator {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ if (Expr->arg_size() < 2)
+ PrintFatalError(Loc, "Set difference needs at least two arguments: " +
+ Expr->getAsString());
+ RecSet Add, Sub;
+ ST.evaluate(*Expr->arg_begin(), Add, Loc);
+ ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub, Loc);
+ for (RecSet::iterator I = Add.begin(), E = Add.end(); I != E; ++I)
+ if (!Sub.count(*I))
+ Elts.insert(*I);
+ }
+};
+
+// (and S1, S2) Set intersection.
+struct AndOp : public SetTheory::Operator {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ if (Expr->arg_size() != 2)
+ PrintFatalError(Loc, "Set intersection requires two arguments: " +
+ Expr->getAsString());
+ RecSet S1, S2;
+ ST.evaluate(Expr->arg_begin()[0], S1, Loc);
+ ST.evaluate(Expr->arg_begin()[1], S2, Loc);
+ for (RecSet::iterator I = S1.begin(), E = S1.end(); I != E; ++I)
+ if (S2.count(*I))
+ Elts.insert(*I);
+ }
+};
+
+// SetIntBinOp - Abstract base class for (Op S, N) operators.
+struct SetIntBinOp : public SetTheory::Operator {
+ virtual void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N,
+ RecSet &Elts, ArrayRef<SMLoc> Loc) = 0;
+
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ if (Expr->arg_size() != 2)
+ PrintFatalError(Loc, "Operator requires (Op Set, Int) arguments: " +
+ Expr->getAsString());
+ RecSet Set;
+ ST.evaluate(Expr->arg_begin()[0], Set, Loc);
+ IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]);
+ if (!II)
+ PrintFatalError(Loc, "Second argument must be an integer: " +
+ Expr->getAsString());
+ apply2(ST, Expr, Set, II->getValue(), Elts, Loc);
+ }
+};
+
+// (shl S, N) Shift left, remove the first N elements.
+struct ShlOp : public SetIntBinOp {
+ void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N,
+ RecSet &Elts, ArrayRef<SMLoc> Loc) override {
+ if (N < 0)
+ PrintFatalError(Loc, "Positive shift required: " +
+ Expr->getAsString());
+ if (unsigned(N) < Set.size())
+ Elts.insert(Set.begin() + N, Set.end());
+ }
+};
+
+// (trunc S, N) Truncate after the first N elements.
+struct TruncOp : public SetIntBinOp {
+ void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N,
+ RecSet &Elts, ArrayRef<SMLoc> Loc) override {
+ if (N < 0)
+ PrintFatalError(Loc, "Positive length required: " +
+ Expr->getAsString());
+ if (unsigned(N) > Set.size())
+ N = Set.size();
+ Elts.insert(Set.begin(), Set.begin() + N);
+ }
+};
+
+// Left/right rotation.
+struct RotOp : public SetIntBinOp {
+ const bool Reverse;
+
+ RotOp(bool Rev) : Reverse(Rev) {}
+
+ void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N,
+ RecSet &Elts, ArrayRef<SMLoc> Loc) override {
+ if (Reverse)
+ N = -N;
+ // N > 0 -> rotate left, N < 0 -> rotate right.
+ if (Set.empty())
+ return;
+ if (N < 0)
+ N = Set.size() - (-N % Set.size());
+ else
+ N %= Set.size();
+ Elts.insert(Set.begin() + N, Set.end());
+ Elts.insert(Set.begin(), Set.begin() + N);
+ }
+};
+
+// (decimate S, N) Pick every N'th element of S.
+struct DecimateOp : public SetIntBinOp {
+ void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N,
+ RecSet &Elts, ArrayRef<SMLoc> Loc) override {
+ if (N <= 0)
+ PrintFatalError(Loc, "Positive stride required: " +
+ Expr->getAsString());
+ for (unsigned I = 0; I < Set.size(); I += N)
+ Elts.insert(Set[I]);
+ }
+};
+
+// (interleave S1, S2, ...) Interleave elements of the arguments.
+struct InterleaveOp : public SetTheory::Operator {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ // Evaluate the arguments individually.
+ SmallVector<RecSet, 4> Args(Expr->getNumArgs());
+ unsigned MaxSize = 0;
+ for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) {
+ ST.evaluate(Expr->getArg(i), Args[i], Loc);
+ MaxSize = std::max(MaxSize, unsigned(Args[i].size()));
+ }
+ // Interleave arguments into Elts.
+ for (unsigned n = 0; n != MaxSize; ++n)
+ for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i)
+ if (n < Args[i].size())
+ Elts.insert(Args[i][n]);
+ }
+};
+
+// (sequence "Format", From, To) Generate a sequence of records by name.
+struct SequenceOp : public SetTheory::Operator {
+ void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts,
+ ArrayRef<SMLoc> Loc) override {
+ int Step = 1;
+ if (Expr->arg_size() > 4)
+ PrintFatalError(Loc, "Bad args to (sequence \"Format\", From, To): " +
+ Expr->getAsString());
+ else if (Expr->arg_size() == 4) {
+ if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) {
+ Step = II->getValue();
+ } else
+ PrintFatalError(Loc, "Stride must be an integer: " +
+ Expr->getAsString());
+ }
+
+ std::string Format;
+ if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0]))
+ Format = SI->getValue();
+ else
+ PrintFatalError(Loc, "Format must be a string: " + Expr->getAsString());
+
+ int64_t From, To;
+ if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]))
+ From = II->getValue();
+ else
+ PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
+ if (From < 0 || From >= (1 << 30))
+ PrintFatalError(Loc, "From out of range");
+
+ if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
+ To = II->getValue();
+ else
+ PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
+ if (To < 0 || To >= (1 << 30))
+ PrintFatalError(Loc, "To out of range");
+
+ RecordKeeper &Records =
+ cast<DefInit>(Expr->getOperator())->getDef()->getRecords();
+
+ Step *= From <= To ? 1 : -1;
+ while (true) {
+ if (Step > 0 && From > To)
+ break;
+ else if (Step < 0 && From < To)
+ break;
+ std::string Name;
+ raw_string_ostream OS(Name);
+ OS << format(Format.c_str(), unsigned(From));
+ Record *Rec = Records.getDef(OS.str());
+ if (!Rec)
+ PrintFatalError(Loc, "No def named '" + Name + "': " +
+ Expr->getAsString());
+ // Try to reevaluate Rec in case it is a set.
+ if (const RecVec *Result = ST.expand(Rec))
+ Elts.insert(Result->begin(), Result->end());
+ else
+ Elts.insert(Rec);
+
+ From += Step;
+ }
+ }
+};
+
+// Expand a Def into a set by evaluating one of its fields.
+struct FieldExpander : public SetTheory::Expander {
+ StringRef FieldName;
+
+ FieldExpander(StringRef fn) : FieldName(fn) {}
+
+ void expand(SetTheory &ST, Record *Def, RecSet &Elts) override {
+ ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc());
+ }
+};
+} // end anonymous namespace
+
+// Pin the vtables to this file.
+void SetTheory::Operator::anchor() {}
+void SetTheory::Expander::anchor() {}
+
+
+SetTheory::SetTheory() {
+ addOperator("add", new AddOp);
+ addOperator("sub", new SubOp);
+ addOperator("and", new AndOp);
+ addOperator("shl", new ShlOp);
+ addOperator("trunc", new TruncOp);
+ addOperator("rotl", new RotOp(false));
+ addOperator("rotr", new RotOp(true));
+ addOperator("decimate", new DecimateOp);
+ addOperator("interleave", new InterleaveOp);
+ addOperator("sequence", new SequenceOp);
+}
+
+void SetTheory::addOperator(StringRef Name, Operator *Op) {
+ Operators[Name] = Op;
+}
+
+void SetTheory::addExpander(StringRef ClassName, Expander *E) {
+ Expanders[ClassName] = E;
+}
+
+void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) {
+ addExpander(ClassName, new FieldExpander(FieldName));
+}
+
+void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) {
+ // A def in a list can be a just an element, or it may expand.
+ if (DefInit *Def = dyn_cast<DefInit>(Expr)) {
+ if (const RecVec *Result = expand(Def->getDef()))
+ return Elts.insert(Result->begin(), Result->end());
+ Elts.insert(Def->getDef());
+ return;
+ }
+
+ // Lists simply expand.
+ if (ListInit *LI = dyn_cast<ListInit>(Expr))
+ return evaluate(LI->begin(), LI->end(), Elts, Loc);
+
+ // Anything else must be a DAG.
+ DagInit *DagExpr = dyn_cast<DagInit>(Expr);
+ if (!DagExpr)
+ PrintFatalError(Loc, "Invalid set element: " + Expr->getAsString());
+ DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator());
+ if (!OpInit)
+ PrintFatalError(Loc, "Bad set expression: " + Expr->getAsString());
+ Operator *Op = Operators.lookup(OpInit->getDef()->getName());
+ if (!Op)
+ PrintFatalError(Loc, "Unknown set operator: " + Expr->getAsString());
+ Op->apply(*this, DagExpr, Elts, Loc);
+}
+
+const RecVec *SetTheory::expand(Record *Set) {
+ // Check existing entries for Set and return early.
+ ExpandMap::iterator I = Expansions.find(Set);
+ if (I != Expansions.end())
+ return &I->second;
+
+ // This is the first time we see Set. Find a suitable expander.
+ const std::vector<Record*> &SC = Set->getSuperClasses();
+ for (unsigned i = 0, e = SC.size(); i != e; ++i) {
+ // Skip unnamed superclasses.
+ if (!dyn_cast<StringInit>(SC[i]->getNameInit()))
+ continue;
+ if (Expander *Exp = Expanders.lookup(SC[i]->getName())) {
+ // This breaks recursive definitions.
+ RecVec &EltVec = Expansions[Set];
+ RecSet Elts;
+ Exp->expand(*this, Set, Elts);
+ EltVec.assign(Elts.begin(), Elts.end());
+ return &EltVec;
+ }
+ }
+
+ // Set is not expandable.
+ return nullptr;
+}
+
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 1ec2eea..fc1d3ca 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -27,9 +27,9 @@
using namespace llvm;
TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
- CurBuffer = 0;
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
- CurPtr = CurBuf->getBufferStart();
+ CurBuffer = SrcMgr.getMainFileID();
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+ CurPtr = CurBuf.begin();
TokStart = nullptr;
}
@@ -52,7 +52,7 @@ int TGLexer::getNextChar() {
case 0: {
// A nul character in the stream is either the end of the current buffer or
// a random nul in the file. Disambiguate that here.
- if (CurPtr-1 != CurBuf->getBufferEnd())
+ if (CurPtr-1 != CurBuf.end())
return 0; // Just whitespace.
// If this is the end of an included file, pop the parent file off the
@@ -60,7 +60,7 @@ int TGLexer::getNextChar() {
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
CurPtr = ParentIncludeLoc.getPointer();
return getNextChar();
}
@@ -187,7 +187,7 @@ tgtok::TokKind TGLexer::LexString() {
while (*CurPtr != '"') {
// If we hit the end of the buffer, report an error.
- if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+ if (*CurPtr == 0 && CurPtr == CurBuf.end())
return ReturnError(StrStart, "End of file in string literal");
if (*CurPtr == '\n' || *CurPtr == '\r')
@@ -220,7 +220,7 @@ tgtok::TokKind TGLexer::LexString() {
// If we hit the end of the buffer, report an error.
case '\0':
- if (CurPtr == CurBuf->getBufferEnd())
+ if (CurPtr == CurBuf.end())
return ReturnError(StrStart, "End of file in string literal");
// FALL THROUGH
default:
@@ -304,7 +304,7 @@ bool TGLexer::LexInclude() {
CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
IncludedFile);
- if (CurBuffer == -1) {
+ if (!CurBuffer) {
PrintError(getLoc(), "Could not find include file '" + Filename + "'");
return true;
}
@@ -319,8 +319,8 @@ bool TGLexer::LexInclude() {
}
Dependencies.insert(std::make_pair(IncludedFile, getLoc()));
// Save the line number and lex buffer of the includer.
- CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
- CurPtr = CurBuf->getBufferStart();
+ CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer();
+ CurPtr = CurBuf.begin();
return false;
}
@@ -333,7 +333,7 @@ void TGLexer::SkipBCPLComment() {
return; // Newline is end of comment.
case 0:
// If this is the end of the buffer, end the comment.
- if (CurPtr == CurBuf->getBufferEnd())
+ if (CurPtr == CurBuf.end())
return;
break;
}
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 1e599f8..a2c95ca 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -14,6 +14,7 @@
#ifndef TGLEXER_H
#define TGLEXER_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/SMLoc.h"
#include <cassert>
@@ -21,7 +22,6 @@
#include <string>
namespace llvm {
-class MemoryBuffer;
class SourceMgr;
class SMLoc;
class Twine;
@@ -63,7 +63,7 @@ class TGLexer {
SourceMgr &SrcMgr;
const char *CurPtr;
- const MemoryBuffer *CurBuf;
+ StringRef CurBuf;
// Information about the current token.
const char *TokStart;
@@ -73,7 +73,7 @@ class TGLexer {
/// CurBuffer - This is the current buffer index we're lexing from as managed
/// by the SourceMgr object.
- int CurBuffer;
+ unsigned CurBuffer;
public:
typedef std::map<std::string, SMLoc> DependenciesMapTy;
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 038e018..0550692 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -360,8 +360,13 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
}
if (Records.getDef(IterRec->getNameInitAsString())) {
- Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
- return true;
+ // If this record is anonymous, it's no problem, just generate a new name
+ if (IterRec->isAnonymous())
+ IterRec->setName(GetNewAnonymousName());
+ else {
+ Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
+ return true;
+ }
}
Records.addDef(IterRec);
@@ -782,7 +787,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
///
/// Operation ::= XOperator ['<' Type '>'] '(' Args ')'
///
-Init *TGParser::ParseOperation(Record *CurRec) {
+Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
switch (Lex.getCode()) {
default:
TokError("unknown operation");
@@ -845,7 +850,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
StringRecTy *SType = dyn_cast<StringRecTy>(LHSt->getType());
if (!LType && !SType) {
- TokError("expected list or string type argumnet in unary operator");
+ TokError("expected list or string type argument in unary operator");
return nullptr;
}
}
@@ -853,7 +858,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
if (Code == UnOpInit::HEAD
|| Code == UnOpInit::TAIL) {
if (!LHSl && !LHSt) {
- TokError("expected list type argumnet in unary operator");
+ TokError("expected list type argument in unary operator");
return nullptr;
}
@@ -877,7 +882,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
assert(LHSt && "expected list type argument in unary operator");
ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
if (!LType) {
- TokError("expected list type argumnet in unary operator");
+ TokError("expected list type argument in unary operator");
return nullptr;
}
if (Code == UnOpInit::HEAD) {
@@ -1021,8 +1026,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
}
Lex.Lex(); // eat the ','
- Init *MHS = ParseValue(CurRec);
- if (!MHS) return nullptr;
+ Init *MHS = ParseValue(CurRec, ItemType);
+ if (!MHS)
+ return nullptr;
if (Lex.getCode() != tgtok::comma) {
TokError("expected ',' in ternary operator");
@@ -1030,8 +1036,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
}
Lex.Lex(); // eat the ','
- Init *RHS = ParseValue(CurRec);
- if (!RHS) return nullptr;
+ Init *RHS = ParseValue(CurRec, ItemType);
+ if (!RHS)
+ return nullptr;
if (Lex.getCode() != tgtok::r_paren) {
TokError("expected ')' in binary operator");
@@ -1441,7 +1448,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XIf:
case tgtok::XForEach:
case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
- return ParseOperation(CurRec);
+ return ParseOperation(CurRec, ItemType);
}
}
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index 6fd442a..9f4b7e9 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -181,7 +181,7 @@ private: // Parser methods.
std::vector<unsigned> ParseRangeList();
bool ParseRangePiece(std::vector<unsigned> &Ranges);
RecTy *ParseType();
- Init *ParseOperation(Record *CurRec);
+ Init *ParseOperation(Record *CurRec, RecTy *ItemType);
RecTy *ParseOperatorType();
Init *ParseObjectName(MultiClass *CurMultiClass);
Record *ParseClassID();
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 1ad5ac8..e6a27c3 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -60,6 +60,7 @@ def AArch64InstrInfo : InstrInfo;
// AArch64 Processors supported.
//
include "AArch64SchedA53.td"
+include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
@@ -89,7 +90,7 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureCRC]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 04906f6..ab2c4b7 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -214,8 +214,8 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
if (SExt->getType() != ConsideredSExtType)
return false;
- for (const Use &U : SExt->uses()) {
- if (isa<GetElementPtrInst>(*U))
+ for (const User *U : SExt->users()) {
+ if (isa<GetElementPtrInst>(U))
return true;
}
@@ -267,8 +267,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
}
// Now try to get through the chain of definitions.
- while (isa<Instruction>(SExt->getOperand(0))) {
- Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0));
+ while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) {
DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n');
if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) {
// We cannot get through something that is not an Instruction
@@ -285,10 +284,10 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
// assertion on the type as all involved sext operation may have not
// been moved yet.
while (!Inst->use_empty()) {
- Value::use_iterator UseIt = Inst->use_begin();
- Instruction *UseInst = dyn_cast<Instruction>(*UseIt);
- assert(UseInst && "Use of sext is not an Instruction!");
- UseInst->setOperand(UseIt->getOperandNo(), SExt);
+ Use &U = *Inst->use_begin();
+ Instruction *User = dyn_cast<Instruction>(U.getUser());
+ assert(User && "User of sext is not an Instruction!");
+ User->setOperand(U.getOperandNo(), SExt);
}
ToRemove.insert(Inst);
SExt->setOperand(0, Inst->getOperand(0));
@@ -385,11 +384,11 @@ void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
if (ToRemove.count(Inst))
continue;
bool inserted = false;
- for (auto Pt : CurPts) {
+ for (auto &Pt : CurPts) {
if (DT.dominates(Inst, Pt)) {
DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n"
<< *Inst << '\n');
- (Pt)->replaceAllUsesWith(Inst);
+ Pt->replaceAllUsesWith(Inst);
ToRemove.insert(Pt);
Pt = Inst;
inserted = true;
@@ -436,7 +435,7 @@ void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
bool insert = false;
// #1.
- for (const Use &U : SExt->uses()) {
+ for (const User *U : SExt->users()) {
const Instruction *Inst = dyn_cast<GetElementPtrInst>(U);
if (Inst && Inst->getNumOperands() > 2) {
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index c3ee9bb..cd94e24 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
default:
- assert(0 && "<unknown operand type>");
+ llvm_unreachable("<unknown operand type>");
case MachineOperand::MO_Register: {
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index 5209452..484e7e8 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) {
static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBZW:
case AArch64::TBNZW:
case AArch64::TBZX:
@@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
static unsigned getOppositeConditionOpcode(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW: return AArch64::TBZW;
case AArch64::TBNZX: return AArch64::TBZX;
case AArch64::TBZW: return AArch64::TBNZW;
@@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
- assert(0 && "unexpected opcode!");
+ llvm_unreachable("unexpected opcode!");
case AArch64::TBNZW:
case AArch64::TBZW:
case AArch64::TBNZX:
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index ded2e17..8e8bd3d 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -18,9 +18,6 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
-class CCIfUnallocated<string Reg, CCAction A> :
- CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>;
-
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8], CCAssignToStack<1, 1>>,
- CCIfType<[i16], CCAssignToStack<2, 2>>,
+ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
+ CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
@@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// 32bit quantity as undef.
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
- CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>,
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
// Pass the remaining arguments on the stack instead.
- CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index c3b5369..2164d77 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
}
unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
- // We can't handle thread-local variables quickly yet. Unfortunately we have
- // to peer through any aliases to find out if that rule applies.
- const GlobalValue *TLSGV = GV;
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- TLSGV = GA->getAliasee();
+ // We can't handle thread-local variables quickly yet.
+ if (GV->isThreadLocal())
+ return 0;
// MachO still uses GOT for large code-model accesses, but ELF requires
// movz/movk sequences, which FastISel doesn't handle yet.
if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
return 0;
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
- if (GVar->isThreadLocal())
- return 0;
-
unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
EVT DestEVT = TLI.getValueType(GV->getType(), true);
@@ -469,11 +463,18 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
break;
}
- // FIXME: If this is a stack pointer and the offset needs to be simplified
- // then put the alloca address into a register, set the base type back to
- // register and continue. This should almost never happen.
+ //If this is a stack pointer and the offset needs to be simplified then put
+ // the alloca address into a register, set the base type back to register and
+ // continue. This should almost never happen.
if (needsLowering && Addr.getKind() == Address::FrameIndexBase) {
- return false;
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
+ ResultReg)
+ .addFrameIndex(Addr.getFI())
+ .addImm(0)
+ .addImm(0);
+ Addr.setKind(Address::RegBase);
+ Addr.setReg(ResultReg);
}
// Since the offset is too large for the load/store instruction get the
@@ -1224,7 +1225,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
case CCValAssign::AExt:
@@ -1235,7 +1235,6 @@ bool AArch64FastISel::ProcessCallArgs(
Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true);
if (Arg == 0)
return false;
- ArgVT = DestVT;
break;
}
default:
@@ -1254,7 +1253,7 @@ bool AArch64FastISel::ProcessCallArgs(
assert(VA.isMemLoc() && "Assuming store on stack.");
// Need to store on the stack.
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+ unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
unsigned BEAlign = 0;
if (ArgSize < 8 && !Subtarget->isLittleEndian())
@@ -1468,10 +1467,12 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
bool RV;
unsigned ResultReg;
RV = EmitLoad(VT, ResultReg, Src);
- assert(RV == true && "Should be able to handle this load.");
+ if (!RV)
+ return false;
+
RV = EmitStore(VT, ResultReg, Dest);
- assert(RV == true && "Should be able to handle this store.");
- (void)RV;
+ if (!RV)
+ return false;
int64_t Size = VT.getSizeInBits() / 8;
Len -= Size;
@@ -1749,6 +1750,17 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
+
+ // FastISel does not have plumbing to deal with extensions where the SrcVT or
+ // DestVT are odd things, so test to make sure that they are both types we can
+ // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
+ // bail out to SelectionDAG.
+ if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
+ (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
+ ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
+ (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
+ return 0;
+
unsigned Opc;
unsigned Imm = 0;
@@ -1895,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) {
case MVT::i32:
ZReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
+ SrcVT = MVT::i32;
break;
case MVT::i64:
ZReg = AArch64::XZR;
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index deb306a..9c33717 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -158,7 +158,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
DebugLoc DL = MBB.findDebugLoc(MBBI);
// Add callee saved registers to move list.
@@ -204,8 +204,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo();
- const AArch64InstrInfo *TII = TM.getInstrInfo();
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 0e00d16..7686e6f 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -18,18 +18,11 @@
namespace llvm {
-class AArch64Subtarget;
-class AArch64TargetMachine;
-
class AArch64FrameLowering : public TargetFrameLowering {
- const AArch64TargetMachine &TM;
-
public:
- explicit AArch64FrameLowering(const AArch64TargetMachine &TM,
- const AArch64Subtarget &STI)
+ explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/),
- TM(TM) {}
+ false /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 7007ffc..3f49fab 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -153,9 +153,6 @@ public:
SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
- SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
- SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
SDNode *SelectBitfieldExtractOp(SDNode *N);
SDNode *SelectBitfieldInsertOp(SDNode *N);
@@ -596,8 +593,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
const GlobalValue *GV = GAN->getGlobal();
unsigned Alignment = GV->getAlignment();
const DataLayout *DL = TLI->getDataLayout();
- if (Alignment == 0 && !Subtarget->isTargetDarwin())
- Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
+ Type *Ty = GV->getType()->getElementType();
+ if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
+ Alignment = DL->getABITypeAlignment(Ty);
if (Alignment >= Size)
return true;
@@ -2111,7 +2109,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
.getVectorElementType()
.getSizeInBits()) {
default:
- assert(0 && "Unexpected vector element type!");
+ llvm_unreachable("Unexpected vector element type!");
case 64:
SubReg = AArch64::dsub;
break;
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 80d6669..28d0035 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -67,15 +67,15 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
//===----------------------------------------------------------------------===//
// AArch64 Lowering public interface.
//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new AArch64_MachoTargetObjectFile();
return new AArch64_ELFTargetObjectFile();
}
-AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<AArch64Subtarget>();
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
@@ -627,7 +627,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
// FIXME: On AArch64, this depends on the type.
- // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
+ // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
return 4095;
}
@@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
#ifndef NDEBUG
MI->dump();
#endif
- assert(0 && "Unexpected instruction for custom inserter!");
- break;
+ llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
@@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
}
- llvm_unreachable("Unexpected instruction for custom inserter!");
}
//===----------------------------------------------------------------------===//
@@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
- assert(0 && "Invalid code");
+ llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
@@ -1387,24 +1385,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
- // FP_TO_XINT conversion from the same type are legal.
- if (VT.getSizeInBits() == InVT.getSizeInBits())
- return Op;
-
- if (InVT == MVT::v2f64 || InVT == MVT::v4f32) {
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
- } else if (InVT == MVT::v2f32) {
+ }
+
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
- return SDValue();
+ return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
@@ -1440,32 +1436,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
- // v2i32 to v2f32 is legal.
- if (VT == MVT::v2f32 && InVT == MVT::v2i32)
- return Op;
-
- // This function only handles v2f64 outputs.
- if (VT == MVT::v2f64) {
- // Extend the input argument to a v2i64 that we can feed into the
- // floating point conversion. Zero or sign extend based on whether
- // we're doing a signed or unsigned float conversion.
- unsigned Opc =
- Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
- assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
- SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
+ if (VT.getSizeInBits() < InVT.getSizeInBits()) {
+ MVT CastVT =
+ MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
+ InVT.getVectorNumElements());
+ In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
+ return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
}
- // Scalarize v2i64 to v2f32 conversions.
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
- SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
- DAG.getConstant(i, MVT::i64));
- Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
- BuildVectorOps.push_back(Sclr);
+ if (VT.getSizeInBits() > InVT.getSizeInBits()) {
+ unsigned CastOpc =
+ Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ EVT CastVT = VT.changeVectorElementTypeToInteger();
+ In = DAG.getNode(CastOpc, dl, CastVT, In);
+ return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
+ return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
@@ -1516,7 +1503,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -1711,7 +1698,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
InVals.push_back(FrameIdxN);
continue;
- } if (VA.isRegLoc()) {
+ }
+
+ if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
@@ -1772,10 +1761,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
SDValue ArgValue;
+ // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ MVT MemVT = VA.getValVT();
+
switch (VA.getLocInfo()) {
default:
break;
+ case CCValAssign::BCvt:
+ MemVT = VA.getLocVT();
+ break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
@@ -1787,10 +1782,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN,
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(FI),
- VA.getLocVT(),
- false, false, false, 0);
+ MemVT, false, false, false, nullptr);
InVals.push_back(ArgValue);
}
@@ -2339,11 +2333,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
- if (Arg.getValueType().isSimple() &&
- Arg.getValueType().getSimpleVT() == MVT::i32 &&
- (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
- VA.getLocVT() == MVT::i16))
- Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
+ if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
+ VA.getValVT() == MVT::i16)
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store =
DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
@@ -4116,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
@@ -4164,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
int VEXTOffsets[2] = { 0, 0 };
+ int OffsetMultipliers[2] = { 1, 1 };
// This loop extracts the usage patterns of the source vectors
// and prepares appropriate SDValues for a shuffle if possible.
for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
+ unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements();
+ SDValue CurSource = SourceVecs[i];
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType()) {
+ // It may hit this case if SourceVecs[i] is AssertSext/AssertZext.
+ // Then bitcast it to the vector which holds asserted element type,
+ // and record the multiplier of element width between SourceVecs and
+ // Build_vector which is needed to extract the correct lanes later.
+ EVT CastVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ SourceVecs[i].getValueSizeInBits() /
+ VT.getVectorElementType().getSizeInBits());
+
+ CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]);
+ OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts;
+ NumSrcElts *= OffsetMultipliers[i];
+ MaxElts[i] *= OffsetMultipliers[i];
+ MinElts[i] *= OffsetMultipliers[i];
+ }
+
+ if (CurSource.getValueType() == VT) {
// No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
+ ShuffleSrcs[i] = CurSource;
VEXTOffsets[i] = 0;
continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ } else if (NumSrcElts < NumElts) {
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
- ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
- DAG.getUNDEF(SourceVecs[i].getValueType()));
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource,
+ DAG.getUNDEF(CurSource.getValueType()));
continue;
}
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
// Since only 64-bit and 128-bit vectors are legal on ARM and
// we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ assert(NumSrcElts == 2 * NumElts &&
"unexpected vector sizes in ReconstructShuffle");
if (MaxElts[i] - MinElts[i] >= NumElts) {
@@ -4203,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
if (MinElts[i] >= NumElts) {
// The extraction can just take the second half
VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
} else if (MaxElts[i] < NumElts) {
// The extraction can just take the first half
VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
} else {
// An actual VEXT is needed
VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource,
+ DAG.getIntPtrConstant(NumElts));
unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
DAG.getConstant(Imm, MVT::i32));
@@ -4238,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
int ExtractElt =
cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]);
} else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+ Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts -
+ VEXTOffsets[1]);
}
}
@@ -5177,11 +5181,37 @@ FailedModImm:
return Op;
}
+// Normalize the operands of BUILD_VECTOR. The value of constant operands will
+// be truncated to fit element width.
+static SDValue NormalizeBuildVector(SDValue Op,
+ SelectionDAG &DAG) {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ EVT EltTy= VT.getVectorElementType();
+
+ if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
+ return Op;
+
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
+ SDValue Lane = Op.getOperand(I);
+ if (Lane.getOpcode() == ISD::Constant) {
+ APInt LowBits(EltTy.getSizeInBits(),
+ cast<ConstantSDNode>(Lane)->getZExtValue());
+ Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+ }
+ Ops.push_back(Lane);
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
SDLoc dl(Op);
EVT VT = Op.getValueType();
+ Op = NormalizeBuildVector(Op, DAG);
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
@@ -6047,18 +6077,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
+ return NumBits1 > NumBits2;
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
@@ -6068,18 +6094,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
+ if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
+ return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
@@ -6092,8 +6114,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
- return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
- VT2.isInteger() && VT1.getSizeInBits() <= 32);
+ return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
+ VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
+ VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
@@ -6346,23 +6369,45 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
APInt Value = C->getAPIntValue();
EVT VT = N->getValueType(0);
- APInt VP1 = Value + 1;
- if (VP1.isPowerOf2()) {
- // Multiplying by one less than a power of two, replace with a shift
- // and a subtract.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- }
- APInt VM1 = Value - 1;
- if (VM1.isPowerOf2()) {
- // Multiplying by one more than a power of two, replace with a shift
- // and an add.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ if (Value.isNonNegative()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ APInt VM1 = Value - 1;
+ if (VM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ APInt VP1 = Value + 1;
+ if (VP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+ N->getOperand(0));
+ }
+ } else {
+ // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+ APInt VNM1 = -Value - 1;
+ if (VNM1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNM1.logBase2(), MVT::i64));
+ SDValue Add =
+ DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
+ }
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+ APInt VNP1 = -Value + 1;
+ if (VNP1.isPowerOf2()) {
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VNP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+ ShiftedVal);
+ }
}
}
return SDValue();
@@ -6687,7 +6732,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
- assert(0 && "unexpected vector type!");
+ llvm_unreachable("unexpected vector type!");
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
@@ -7020,7 +7065,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
+ else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, MVT::i32));
@@ -7867,6 +7912,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
+TargetLoweringBase::LegalizeTypeAction
+AArch64TargetLowering::getPreferredVectorAction(EVT VT) const {
+ MVT SVT = VT.getSimpleVT();
+ // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
+ // v4i16, v2i32 instead of to promote.
+ if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32
+ || SVT == MVT::v1f32)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index de16c4d..cb0b9ef 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -197,7 +197,7 @@ class AArch64TargetLowering : public TargetLowering {
bool RequireStrictAlign;
public:
- explicit AArch64TargetLowering(AArch64TargetMachine &TM);
+ explicit AArch64TargetLowering(TargetMachine &TM);
/// Selects the correct CCAssignFn for a the given CallingConvention
/// value.
@@ -324,6 +324,9 @@ public:
bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
+
private:
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d455d7e..5007172 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(enc, MVT::i32);
}]>;
-def LogicalImm32Operand : AsmOperandClass {
- let Name = "LogicalImm32";
- let DiagnosticType = "LogicalSecondSource";
-}
-def LogicalImm64Operand : AsmOperandClass {
- let Name = "LogicalImm64";
- let DiagnosticType = "LogicalSecondSource";
+let DiagnosticType = "LogicalSecondSource" in {
+ def LogicalImm32Operand : AsmOperandClass {
+ let Name = "LogicalImm32";
+ }
+ def LogicalImm64Operand : AsmOperandClass {
+ let Name = "LogicalImm64";
+ }
+ def LogicalImm32NotOperand : AsmOperandClass {
+ let Name = "LogicalImm32Not";
+ }
+ def LogicalImm64NotOperand : AsmOperandClass {
+ let Name = "LogicalImm64Not";
+ }
}
def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
@@ -468,6 +474,12 @@ def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
let PrintMethod = "printLogicalImm64";
let ParserMatchClass = LogicalImm64Operand;
}
+def logical_imm32_not : Operand<i32> {
+ let ParserMatchClass = LogicalImm32NotOperand;
+}
+def logical_imm64_not : Operand<i64> {
+ let ParserMatchClass = LogicalImm64NotOperand;
+}
// imm0_65535 predicate - True if the immediate is in the range [0,65535].
def Imm0_65535Operand : AsmImmRange<0, 65535>;
@@ -963,8 +975,14 @@ def ccode : Operand<i32> {
let ParserMatchClass = CondCode;
}
def inv_ccode : Operand<i32> {
+ // AL and NV are invalid in the aliases which use inv_ccode
let PrintMethod = "printInverseCondCode";
let ParserMatchClass = CondCode;
+ let MCOperandPredicate = [{
+ return MCOp.isImm() &&
+ MCOp.getImm() != AArch64CC::AL &&
+ MCOp.getImm() != AArch64CC::NV;
+ }];
}
// Conditional branch target. 19-bit immediate. The low two bits of the target
@@ -1323,13 +1341,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
[(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
- Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
}
@@ -1339,7 +1357,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm,
: BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
[(set GPR64:$Rd, (AccNode GPR64:$Ra,
(mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
- Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -1738,6 +1756,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
// Register/register aliases with no shift when SP is not used.
def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
@@ -1925,22 +1947,32 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
: InstAlias<asm#" $dst, $src1, $src2",
(inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
-let AddedComplexity = 6 in
-multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
+ let AddedComplexity = 6 in
def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
[(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
logical_imm32:$imm))]> {
let Inst{31} = 0;
let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
}
+ let AddedComplexity = 6 in
def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
[(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
logical_imm64:$imm))]> {
let Inst{31} = 1;
}
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
-multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
+multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
+ string Alias> {
let isCompare = 1, Defs = [NZCV] in {
def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
[(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
@@ -1952,6 +1984,13 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
let Inst{31} = 1;
}
} // end Defs = [NZCV]
+
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
+ logical_imm32_not:$imm), 0>;
+ def : InstAlias<Alias # " $Rd, $Rn, $imm",
+ (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
+ logical_imm64_not:$imm), 0>;
}
class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index ff115c0..ce85b2c 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MCInstrDesc &Desc = MI->getDesc();
+ const MachineBasicBlock &MBB = *MI->getParent();
+ const MachineFunction *MF = MBB.getParent();
+ const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+ if (MI->getOpcode() == AArch64::INLINEASM)
+ return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+ const MCInstrDesc &Desc = MI->getDesc();
switch (Desc.getOpcode()) {
default:
// Anything not explicitly designated otherwise is a nomal 4-byte insn.
@@ -1224,7 +1230,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
@@ -1385,7 +1391,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
@@ -1406,7 +1412,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
@@ -1423,7 +1429,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
@@ -1440,7 +1446,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
@@ -1461,7 +1467,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if(getSubTarget().hasNEON()) {
+ if(Subtarget.hasNEON()) {
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
&AArch64::FPR128RegClass);
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
@@ -1577,39 +1583,39 @@ void AArch64InstrInfo::storeRegToStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::STRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d, Offset = false;
}
@@ -1675,39 +1681,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d, Offset = false;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev1d, Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv1d, Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d, Offset = false;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d, Offset = false;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
- assert(getSubTarget().hasNEON() &&
+ assert(Subtarget.hasNEON() &&
"Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d, Offset = false;
}
@@ -1726,7 +1732,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV) {
if (DestReg == SrcReg && Offset == 0)
return;
@@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
*OutUnscaledOp = 0;
switch (MI.getOpcode()) {
default:
- assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+ llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
// Vector spills/fills can't take an immediate offset.
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 90ce75f..f70b82b 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -44,8 +44,6 @@ public:
/// always be able to get register info as well (through this method).
const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
- const AArch64Subtarget &getSubTarget() const { return Subtarget; }
-
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -168,7 +166,7 @@ private:
/// if necessary, to be replaced by the scavenger at the end of PEI.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
- const AArch64InstrInfo *TII,
+ const TargetInstrInfo *TII,
MachineInstr::MIFlag = MachineInstr::NoFlags,
bool SetNZCV = false);
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 9ad36e8..1211fba 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -323,7 +323,7 @@ def : Pat<(AArch64LOADgot tconstpool:$addr),
// System instructions.
//===----------------------------------------------------------------------===//
-def HINT : HintI<"hint">;
+def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
@@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
//===----------------------------------------------------------------------===//
// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
-defm AND : LogicalImm<0b00, "and", and>;
-defm EOR : LogicalImm<0b10, "eor", xor>;
-defm ORR : LogicalImm<0b01, "orr", or>;
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
+defm AND : LogicalImm<0b00, "and", and, "bic">;
+defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
+defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
@@ -737,6 +737,10 @@ def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit">;
+
+def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>;
+def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>;
+
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
@@ -2238,6 +2242,81 @@ def : Pat<(f32_to_f16 FPR32:$Rn),
def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
[(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
+// When converting from f16 coming directly from a load, make sure we
+// load into the FPR16 registers rather than going through the GPRs.
+// f16->f32
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))),
+ (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))),
+ (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// f16->f64
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))))))),
+ (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>;
+def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))))))),
+ (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))),
+ (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>;
+def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))),
+ (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>;
+
+// When converting to f16 going directly to a store, make sure we use the
+// appropriate direct conversion instructions and store via the FPR16
+// registers rather than going through the GPRs.
+let AddedComplexity = 10 in {
+// f32->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+// f64->f16
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)>;
+def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>;
+}
+
+
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index e7454be..3df9c4f 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -40,14 +40,13 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
-static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20),
- cl::Hidden);
+static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
+ cl::init(20), cl::Hidden);
// Place holder while testing unscaled load/store combining
-static cl::opt<bool>
-EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow AArch64 unscaled load/store combining"),
- cl::init(true));
+static cl::opt<bool> EnableAArch64UnscaledMemOp(
+ "aarch64-unscaled-mem-op", cl::Hidden,
+ cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
namespace {
struct AArch64LoadStoreOpt : public MachineFunctionPass {
@@ -60,19 +59,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
// Scan the instructions looking for a load/store that can be combined
// with the current instruction into a load/store pair.
// Return the matching instruction if one is found, else MBB->end().
- // If a matching instruction is found, mergeForward is set to true if the
+ // If a matching instruction is found, MergeForward is set to true if the
// merge is to remove the first instruction and replace the second with
// a pair-wise insn, and false if the reverse is true.
MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward,
+ bool &MergeForward,
unsigned Limit);
// Merge the two instructions indicated into a single pair-wise instruction.
- // If mergeForward is true, erase the first instruction and fold its
+ // If MergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
// following the first instruction (which may change during processing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired, bool mergeForward);
+ MachineBasicBlock::iterator Paired, bool MergeForward);
// Scan the instruction list to find a base register update that can
// be combined with the current instruction (a load or store) using
@@ -142,7 +141,7 @@ static bool isUnscaledLdst(unsigned Opc) {
int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
switch (MemMI->getOpcode()) {
default:
- llvm_unreachable("Opcode has has unknown size!");
+ llvm_unreachable("Opcode has unknown size!");
case AArch64::STRSui:
case AArch64::STURSi:
return 4;
@@ -217,16 +216,26 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
- case AArch64::STRSui: return AArch64::STRSpre;
- case AArch64::STRDui: return AArch64::STRDpre;
- case AArch64::STRQui: return AArch64::STRQpre;
- case AArch64::STRWui: return AArch64::STRWpre;
- case AArch64::STRXui: return AArch64::STRXpre;
- case AArch64::LDRSui: return AArch64::LDRSpre;
- case AArch64::LDRDui: return AArch64::LDRDpre;
- case AArch64::LDRQui: return AArch64::LDRQpre;
- case AArch64::LDRWui: return AArch64::LDRWpre;
- case AArch64::LDRXui: return AArch64::LDRXpre;
+ case AArch64::STRSui:
+ return AArch64::STRSpre;
+ case AArch64::STRDui:
+ return AArch64::STRDpre;
+ case AArch64::STRQui:
+ return AArch64::STRQpre;
+ case AArch64::STRWui:
+ return AArch64::STRWpre;
+ case AArch64::STRXui:
+ return AArch64::STRXpre;
+ case AArch64::LDRSui:
+ return AArch64::LDRSpre;
+ case AArch64::LDRDui:
+ return AArch64::LDRDpre;
+ case AArch64::LDRQui:
+ return AArch64::LDRQpre;
+ case AArch64::LDRWui:
+ return AArch64::LDRWpre;
+ case AArch64::LDRXui:
+ return AArch64::LDRXpre;
}
}
@@ -260,7 +269,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
MachineBasicBlock::iterator
AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired,
- bool mergeForward) {
+ bool MergeForward) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -276,12 +285,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
// Insert our new paired instruction after whichever of the paired
- // instructions mergeForward indicates.
- MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I;
- // Also based on mergeForward is from where we copy the base register operand
+ // instructions MergeForward indicates.
+ MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
+ // Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
MachineOperand &BaseRegOp =
- mergeForward ? Paired->getOperand(1) : I->getOperand(1);
+ MergeForward ? Paired->getOperand(1) : I->getOperand(1);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI, *Rt2MI;
@@ -355,8 +364,8 @@ static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
if (IsUnscaled) {
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
- int elemOffset = Offset / OffsetStride;
- if (elemOffset > 63 || elemOffset < -64)
+ int ElemOffset = Offset / OffsetStride;
+ if (ElemOffset > 63 || ElemOffset < -64)
return false;
}
return true;
@@ -374,14 +383,14 @@ static int alignTo(int Num, int PowOf2) {
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward, unsigned Limit) {
+ bool &MergeForward, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
++MBBI;
int Opc = FirstMI->getOpcode();
- bool mayLoad = FirstMI->mayLoad();
+ bool MayLoad = FirstMI->mayLoad();
bool IsUnscaled = isUnscaledLdst(Opc);
unsigned Reg = FirstMI->getOperand(0).getReg();
unsigned BaseReg = FirstMI->getOperand(1).getReg();
@@ -453,7 +462,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the destination register of the loads is the same register, bail
// and keep looking. A load-pair instruction with both destination
// registers the same is UNPREDICTABLE and will result in an exception.
- if (mayLoad && Reg == MI->getOperand(0).getReg()) {
+ if (MayLoad && Reg == MI->getOperand(0).getReg()) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
}
@@ -462,7 +471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// the two instructions, we can combine the second into the first.
if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
!UsedRegs[MI->getOperand(0).getReg()]) {
- mergeForward = false;
+ MergeForward = false;
return MBBI;
}
@@ -471,7 +480,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// second.
if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
!UsedRegs[FirstMI->getOperand(0).getReg()]) {
- mergeForward = true;
+ MergeForward = true;
return MBBI;
}
// Unable to combine these instructions due to interference in between.
@@ -798,14 +807,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
break;
}
// Look ahead up to ScanLimit instructions for a pairable instruction.
- bool mergeForward = false;
+ bool MergeForward = false;
MachineBasicBlock::iterator Paired =
- findMatchingInsn(MBBI, mergeForward, ScanLimit);
+ findMatchingInsn(MBBI, MergeForward, ScanLimit);
if (Paired != E) {
// Merge the loads into a pair. Keeping the iterator straight is a
// pain, so we let the merge routine tell us what the next instruction
// is after it's done mucking about.
- MBBI = mergePairedInsns(MBBI, Paired, mergeForward);
+ MBBI = mergePairedInsns(MBBI, Paired, MergeForward);
Modified = true;
++NumPairCreated;
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index ab6d375..75a17b9 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
AArch64II::MO_PAGEOFF)
RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
else
- assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+ llvm_unreachable("Unexpected target flags with MO_GOT on GV operand");
} else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
@@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
MCOperand &MCOp) const {
switch (MO.getType()) {
default:
- assert(0 && "unknown operand type");
+ llvm_unreachable("unknown operand type");
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit())
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 21c927f..a30e4ad 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -175,7 +175,7 @@ def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
// This is for indirect tail calls to store the address of the destination.
def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
X22, X23, X24, X25, X26,
- X27, X28)>;
+ X27, X28, FP, LR)>;
// GPR register classes for post increment amount of vector load/store that
// has alternate printing when Rm=31 and prints a constant immediate value
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
index 0c3949e..d709bee 100644
--- a/lib/Target/AArch64/AArch64SchedA53.td
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -148,9 +148,9 @@ def : ReadAdvance<ReadVLD, 0>;
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
// operands are needed one cycle later if and only if they are to be
-// shifted. Otherwise, they too are needed two cycle later. This same
+// shifted. Otherwise, they too are needed two cycles later. This same
// ReadAdvance applies to Extended registers as well, even though there is
-// a seperate SchedPredicate for them.
+// a separate SchedPredicate for them.
def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
WriteISReg, WriteIEReg,WriteIS,
WriteID32,WriteID64,
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
new file mode 100644
index 0000000..8209f96
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -0,0 +1,304 @@
+//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 8; // 3-way decode and 8-way issue
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops
+def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops
+def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops
+def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops
+def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops
+def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops
+def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "AArch64SchedA57WriteRes.td"
+
+//===----------------------------------------------------------------------===//
+// Map the target-defined scheduler read/write resources and latency for
+// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
+// defining the aliases precludes the need for mapping them using WriteRes. The
+// aliases are sufficient for creating a coarse, working model. As the model
+// evolves, InstRWs will be used to override these SchedAliases.
+
+def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
+def : SchedAlias<WriteI, A57Write_1cyc_1I>;
+def : SchedAlias<WriteISReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>;
+def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
+def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
+def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
+def : SchedAlias<WriteIM32, A57Write_3cyc_1M>;
+def : SchedAlias<WriteIM64, A57Write_5cyc_1M>;
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
+def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : SchedAlias<WriteSTP, A57Write_1cyc_1S>;
+def : SchedAlias<WriteAdr, A57Write_1cyc_1I>;
+def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>;
+def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>;
+def : SchedAlias<WriteF, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
+def : SchedAlias<WriteFMul, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>;
+def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
+def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
+
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+// Forwarding logic is not [yet] explicitly modeled beyond what is captured
+// in the latencies of the A57 Generic SchedWriteRes's.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Specialize the coarse model by associating instruction groups with the
+// subtarget-defined types. As the modeled is refined, this will override most
+// of the above ShchedAlias mappings.
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+
+
+// Divide and Multiply Instructions
+// -----------------------------------------------------------------------------
+
+// Multiply high
+def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+
+// Miscellaneous Data-Processing Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>;
+def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>;
+def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
+
+
+// Cryptography Extensions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>;
+
+
+// Vector Load
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>;
+def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>;
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>;
+
+def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>;
+def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>;
+def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>;
+
+def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+// Vector Store
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>;
+
+def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>;
+def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>;
+def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>;
+def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>;
+def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>;
+def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>;
+def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>;
+def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>;
+def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>;
+def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>;
+def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+} // SchedModel = CortexA57Model
diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
new file mode 100644
index 0000000..a8f421b
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -0,0 +1,512 @@
+//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; }
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 4 micro-op types
+
+def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+}
+def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitL, A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 5 micro-op types
+
+def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 5;
+}
+def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 6 micro-op types
+
+def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 7 micro-op types
+
+def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 7;
+}
+def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 7;
+}
+def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 8 micro-op types
+
+def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+}
+def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 8;
+}
+def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 9 micro-op types
+
+def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS]> {
+ let Latency = 8;
+ let NumMicroOps = 9;
+}
+def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI,
+ A57UnitL, A57UnitL,
+ A57UnitL, A57UnitL,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 10 micro-op types
+
+def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 10;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 11 micro-op types
+
+def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 12 micro-op types
+
+def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define Generic 13 micro-op types
+
+def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS, A57UnitS,
+ A57UnitS, A57UnitS,
+ A57UnitV, A57UnitV,
+ A57UnitV, A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 13;
+}
+
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 5c65b75..1bf64fc 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "aarch64-selectiondag-info"
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {}
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
@@ -30,7 +29,9 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const char *bzeroEntry =
- (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr;
+ (V && V->isNullValue())
+ ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry()
+ : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
@@ -50,7 +51,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index 8381f99..1180eea 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -19,12 +19,8 @@
namespace llvm {
class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const AArch64Subtarget *Subtarget;
-
public:
- explicit AArch64SelectionDAGInfo(const TargetMachine &TM);
+ explicit AArch64SelectionDAGInfo(const DataLayout *DL);
~AArch64SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index cd69994..bb0b72c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -30,21 +30,35 @@ static cl::opt<bool>
EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
"converter pass"), cl::init(true), cl::Hidden);
-AArch64Subtarget::AArch64Subtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS, bool LittleEndian)
- : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
- TargetTriple(TT), IsLittleEndian(LittleEndian) {
+AArch64Subtarget &
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
// Determine default and user-specified characteristics
if (CPUString.empty())
CPUString = "generic";
ParseSubtargetFeatures(CPUString, FS);
+ return *this;
}
+AArch64Subtarget::AArch64Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool LittleEndian)
+ : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
+ TargetTriple(TT),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized
+ // before TLInfo is constructed.
+ DL(isTargetMachO()
+ ? "e-m:o-i64:64-i128:128-n32:64-S128"
+ : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+ : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)),
+ TSInfo(&DL), TLInfo(TM) {}
+
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned char
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 590ea05..52124f6 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -14,8 +14,13 @@
#ifndef AArch64SUBTARGET_H
#define AArch64SUBTARGET_H
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64ISelLowering.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -49,15 +54,32 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- /// IsLittleEndian - Is the target little endian?
- bool IsLittleEndian;
+ const DataLayout DL;
+ AArch64FrameLowering FrameLowering;
+ AArch64InstrInfo InstrInfo;
+ AArch64SelectionDAGInfo TSInfo;
+ AArch64TargetLowering TLInfo;
+private:
+ /// initializeSubtargetDependencies - Initializes using CPUString and the
+ /// passed in feature string so that we can use initializer lists for
+ /// subtarget initialization.
+ AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool LittleEndian);
-
+ const std::string &FS, TargetMachine &TM, bool LittleEndian);
+
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const AArch64FrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ const AArch64TargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+ const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
bool enableMachineScheduler() const override { return true; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -69,7 +91,7 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool isLittleEndian() const { return IsLittleEndian; }
+ bool isLittleEndian() const { return DL.isLittleEndian(); }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index 0b5dd2f..f99b90b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -53,6 +53,12 @@ static cl::opt<bool>
EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
" optimization pass"), cl::init(true), cl::Hidden);
+static cl::opt<bool>
+EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -71,16 +77,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool LittleEndian)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, LittleEndian),
- // This nested ternary is horrible, but DL needs to be properly
- // initialized
- // before TLInfo is constructed.
- DL(Subtarget.isTargetMachO()
- ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
- : "E-m:e-i64:64-i128:128-n32:64-S128")),
- InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
- TSInfo(*this) {
+ Subtarget(TT, CPU, FS, *this, LittleEndian) {
initAsmInfo();
}
@@ -113,6 +110,7 @@ public:
return getTM<AArch64TargetMachine>();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addILPOpts() override;
@@ -135,6 +133,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(this, PM);
}
+void AArch64PassConfig::addIRPasses() {
+ // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
+ // ourselves.
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass());
+
+ TargetPassConfig::addIRPasses();
+}
+
// Pass Pipeline Configuration
bool AArch64PassConfig::addPreISel() {
// Run promote constant before global merge, so that the promoted constants
@@ -146,10 +158,6 @@ bool AArch64PassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
- // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
- // ourselves.
- addPass(createAtomicExpandLoadLinkedPass(TM));
-
return false;
}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 079b19b..852cb3f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -15,13 +15,9 @@
#define AArch64TARGETMACHINE_H
#include "AArch64InstrInfo.h"
-#include "AArch64ISelLowering.h"
#include "AArch64Subtarget.h"
-#include "AArch64FrameLowering.h"
-#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
namespace llvm {
@@ -29,13 +25,6 @@ class AArch64TargetMachine : public LLVMTargetMachine {
protected:
AArch64Subtarget Subtarget;
-private:
- const DataLayout DL;
- AArch64InstrInfo InstrInfo;
- AArch64TargetLowering TLInfo;
- AArch64FrameLowering FrameLowering;
- AArch64SelectionDAGInfo TSInfo;
-
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
@@ -46,18 +35,22 @@ public:
return &Subtarget;
}
const AArch64TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
const AArch64FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const AArch64InstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const AArch64RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Pass Pipeline Configuration
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 33e482a..1dac14b 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -306,28 +306,64 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
// LowerVectorINT_TO_FP:
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+
+ // Complex: to v2f32
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
+
+ // Complex: to v4f32
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
+ { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
+
+ // Complex: to v2f64
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
+
+
// LowerVectorFP_TO_INT
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
{ ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
- { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
+
+ // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
+
+ // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
+ { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
+
+ // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
+ { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
};
int Idx = ConvertCostTableLookup<MVT>(
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 65b77c5..c42d11e 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -38,14 +38,19 @@ namespace {
class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
-public:
- typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
-
private:
StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ // Map of register aliases registers via the .req directive.
+ StringMap<std::pair<bool, unsigned> > RegisterReqs;
+
+ AArch64TargetStreamer &getTargetStreamer() {
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<AArch64TargetStreamer &>(TS);
+ }
+
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -54,6 +59,7 @@ private:
bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
AArch64CC::CondCode parseCondCodeString(StringRef Cond);
bool parseCondCode(OperandVector &Operands, bool invertCondCode);
+ unsigned matchRegisterNameAlias(StringRef Name, bool isVector);
int tryParseRegister();
int tryMatchVectorRegister(StringRef &Kind, bool expected);
bool parseRegister(OperandVector &Operands);
@@ -70,6 +76,10 @@ private:
bool parseDirectiveTLSDescCall(SMLoc L);
bool parseDirectiveLOH(StringRef LOH, SMLoc L);
+ bool parseDirectiveLtorg(SMLoc L);
+
+ bool parseDirectiveReq(StringRef Name, SMLoc L);
+ bool parseDirectiveUnreq(SMLoc L);
bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -108,6 +118,8 @@ public:
const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
+ if (Parser.getStreamer().getTargetStreamer() == nullptr)
+ new AArch64TargetStreamer(Parser.getStreamer());
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -117,7 +129,7 @@ public:
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
static bool classifySymbolRef(const MCExpr *Expr,
@@ -240,10 +252,10 @@ private:
// the add<>Operands() calls.
MCContext &Ctx;
+public:
AArch64Operand(KindTy K, MCContext &_Ctx)
: MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
-public:
AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -607,7 +619,11 @@ public:
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
if (!MCE)
return false;
- return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32);
+ int64_t Val = MCE->getValue();
+ if (Val >> 32 != 0 && Val >> 32 != ~0LL)
+ return false;
+ Val &= 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
}
bool isLogicalImm64() const {
if (!isImm())
@@ -617,6 +633,23 @@ public:
return false;
return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
}
+ bool isLogicalImm32Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ return AArch64_AM::isLogicalImmediate(Val, 32);
+ }
+ bool isLogicalImm64Not() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64);
+ }
bool isShiftedImm() const { return Kind == k_ShiftedImm; }
bool isAddSubImm() const {
if (!isShiftedImm() && !isImm())
@@ -1348,7 +1381,8 @@ public:
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
assert(MCE && "Invalid logical immediate operand!");
- uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32);
Inst.addOperand(MCOperand::CreateImm(encoding));
}
@@ -1360,6 +1394,22 @@ public:
Inst.addOperand(MCOperand::CreateImm(encoding));
}
+ void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ int64_t Val = ~MCE->getValue() & 0xFFFFFFFF;
+ uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
+ void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+ uint64_t encoding =
+ AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
+
void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
@@ -1523,9 +1573,9 @@ public:
void print(raw_ostream &OS) const override;
- static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Token, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Token, Ctx);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->Tok.IsSuffix = IsSuffix;
@@ -1534,9 +1584,9 @@ public:
return Op;
}
- static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Register, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Register, Ctx);
Op->Reg.RegNum = RegNum;
Op->Reg.isVector = isVector;
Op->StartLoc = S;
@@ -1544,10 +1594,10 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
- unsigned NumElements, char ElementKind,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements,
+ char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.NumElements = NumElements;
@@ -1557,28 +1607,29 @@ public:
return Op;
}
- static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateShiftedImm(const MCExpr *Val,
- unsigned ShiftAmount, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateShiftedImm(const MCExpr *Val,
+ unsigned ShiftAmount,
+ SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx);
Op->ShiftedImm .Val = Val;
Op->ShiftedImm.ShiftAmount = ShiftAmount;
Op->StartLoc = S;
@@ -1586,34 +1637,36 @@ public:
return Op;
}
- static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx);
Op->CondCode.Code = Code;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateFPImm(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx);
Op->FPImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx);
Op->Barrier.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S,
- uint64_t FeatureBits, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.FeatureBits = FeatureBits;
@@ -1622,27 +1675,28 @@ public:
return Op;
}
- static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx);
+ static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx);
Op->SysCRImm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx);
+ static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S,
+ MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx);
Op->Prefetch.Val = Val;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp,
- unsigned Val, bool HasExplicitAmount,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx);
+ static std::unique_ptr<AArch64Operand>
+ CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
+ bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx);
Op->ShiftExtend.Type = ShOp;
Op->ShiftExtend.Amount = Val;
Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
@@ -1816,6 +1870,26 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return (RegNo == (unsigned)-1);
}
+// Matches a register name or register alias previously defined by '.req'
+unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
+ bool isVector) {
+ unsigned RegNum = isVector ? matchVectorRegName(Name)
+ : MatchRegisterName(Name);
+
+ if (RegNum == 0) {
+ // Check for aliases registered via .req. Canonicalize to lower case.
+ // That's more consistent since register names are case insensitive, and
+ // it's how the original entry was passed in from MC/MCParser/AsmParser.
+ auto Entry = RegisterReqs.find(Name.lower());
+ if (Entry == RegisterReqs.end())
+ return 0;
+ // set RegNum if the match is the right kind of register
+ if (isVector == Entry->getValue().first)
+ RegNum = Entry->getValue().second;
+ }
+ return RegNum;
+}
+
/// tryParseRegister - Try to parse a register name. The token must be an
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
@@ -1824,7 +1898,7 @@ int AArch64AsmParser::tryParseRegister() {
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = MatchRegisterName(lowerCase);
+ unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
// Also handle a few aliases of registers.
if (RegNum == 0)
RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1854,7 +1928,8 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
// a '.'.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchVectorRegName(Head);
+ unsigned RegNum = matchRegisterNameAlias(Head, true);
+
if (RegNum) {
if (Next != StringRef::npos) {
Kind = Name.slice(Next, StringRef::npos);
@@ -2183,8 +2258,11 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
return TokError("invalid condition code");
Parser.Lex(); // Eat identifier token.
- if (invertCondCode)
+ if (invertCondCode) {
+ if (CC == AArch64CC::AL || CC == AArch64CC::NV)
+ return TokError("condition codes AL and NV are invalid for this instruction");
CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC));
+ }
Operands.push_back(
AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
@@ -2849,7 +2927,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
if (!Tok.is(AsmToken::Identifier))
return MatchOperand_NoMatch;
- unsigned RegNum = MatchRegisterName(Tok.getString().lower());
+ unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false);
MCContext &Ctx = getContext();
const MCRegisterInfo *RI = Ctx.getRegisterInfo();
@@ -3000,6 +3078,43 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext()));
return false;
}
+ case AsmToken::Equal: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
+ return Error(Loc, "unexpected token in operand");
+ Parser.Lex(); // Eat '='
+ const MCExpr *SubExprVal;
+ if (getParser().parseExpression(SubExprVal))
+ return true;
+
+ MCContext& Ctx = getContext();
+ E = SMLoc::getFromPointer(Loc.getPointer() - 1);
+ // If the op is an imm and can be fit into a mov, then replace ldr with mov.
+ if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 &&
+ static_cast<AArch64Operand &>(*Operands[1]).isReg()) {
+ bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Operands[1]->getReg());
+ uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue();
+ uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16;
+ while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) {
+ ShiftAmt += 16;
+ Imm >>= 16;
+ }
+ if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) {
+ Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx);
+ Operands.push_back(AArch64Operand::CreateImm(
+ MCConstantExpr::Create(Imm, Ctx), S, E, Ctx));
+ if (ShiftAmt)
+ Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL,
+ ShiftAmt, true, S, E, Ctx));
+ return false;
+ }
+ }
+ // If it is a label or an imm that cannot fit in a movz, put it into CP.
+ const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+ Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
+ return false;
+ }
}
}
@@ -3029,6 +3144,15 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
.Case("bnv", "b.nv")
.Default(Name);
+ // First check for the AArch64-specific .req directive.
+ if (Parser.getTok().is(AsmToken::Identifier) &&
+ Parser.getTok().getIdentifier() == ".req") {
+ parseDirectiveReq(Name, NameLoc);
+ // We always return 'error' for this, as we're done with this
+ // statement and don't need to match the 'instruction."
+ return true;
+ }
+
// Create the leading tokens for the mnemonic, split by '.' characters.
size_t Start = 0, Next = Name.find('.');
StringRef Head = Name.slice(Start, Next);
@@ -3443,8 +3567,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
case Match_MnemonicFail:
return Error(Loc, "unrecognized instruction mnemonic");
default:
- assert(0 && "unexpected error code!");
- return Error(Loc, "invalid instruction format");
+ llvm_unreachable("unexpected error code!");
}
}
@@ -3456,23 +3579,23 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
unsigned &ErrorInfo,
bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
- StringRef Tok = Op->getToken();
+ StringRef Tok = Op.getToken();
unsigned NumOperands = Operands.size();
if (NumOperands == 4 && Tok == "lsl") {
- AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- if (Op2->isReg() && Op3->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ if (Op2.isReg() && Op3.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
if (Op3CE) {
uint64_t Op3Val = Op3CE->getValue();
uint64_t NewOp3Val = 0;
uint64_t NewOp4Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op2->getReg())) {
+ Op2.getReg())) {
NewOp3Val = (32 - Op3Val) & 0x1f;
NewOp4Val = 31 - Op3Val;
} else {
@@ -3484,26 +3607,24 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
- Op3->getEndLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
Operands.push_back(AArch64Operand::CreateImm(
- NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
- delete Op3;
- delete Op;
+ NewOp4, Op3.getStartLoc(), Op3.getEndLoc(), getContext()));
+ Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3.getStartLoc(),
+ Op3.getEndLoc(), getContext());
}
}
} else if (NumOperands == 5) {
// FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
// UBFIZ -> UBFM aliases.
if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3511,21 +3632,21 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp3Val = 0;
if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
NewOp3Val = (32 - Op3Val) & 0x1f;
else
NewOp3Val = (64 - Op3Val) & 0x3f;
@@ -3533,7 +3654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t NewOp4Val = Op4Val - 1;
if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested insert overflows register");
const MCExpr *NewOp3 =
@@ -3541,24 +3662,20 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[3] = AArch64Operand::CreateImm(
- NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext());
+ NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfi")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfiz")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfiz")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op3;
- delete Op4;
}
}
@@ -3566,13 +3683,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// UBFX -> UBFM aliases.
} else if (NumOperands == 5 &&
(Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
- AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
- AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+ AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]);
+ AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]);
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+ if (Op1.isReg() && Op3.isImm() && Op4.isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm());
if (Op3CE && Op4CE) {
uint64_t Op3Val = Op3CE->getValue();
@@ -3580,42 +3697,39 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
uint64_t RegWidth = 0;
if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op1->getReg()))
+ Op1.getReg()))
RegWidth = 64;
else
RegWidth = 32;
if (Op3Val >= RegWidth)
- return Error(Op3->getStartLoc(),
+ return Error(Op3.getStartLoc(),
"expected integer in range [0, 31]");
if (Op4Val < 1 || Op4Val > RegWidth)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"expected integer in range [1, 32]");
uint64_t NewOp4Val = Op3Val + Op4Val - 1;
if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val)
- return Error(Op4->getStartLoc(),
+ return Error(Op4.getStartLoc(),
"requested extract overflows register");
const MCExpr *NewOp4 =
MCConstantExpr::Create(NewOp4Val, getContext());
Operands[4] = AArch64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext());
if (Tok == "bfxil")
Operands[0] = AArch64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
+ "bfm", false, Op.getStartLoc(), getContext());
else if (Tok == "sbfx")
Operands[0] = AArch64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
+ "sbfm", false, Op.getStartLoc(), getContext());
else if (Tok == "ubfx")
Operands[0] = AArch64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
+ "ubfm", false, Op.getStartLoc(), getContext());
else
llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op4;
}
}
}
@@ -3626,63 +3740,58 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
// FIXME: Likewise for sxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR64. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]);
+ if (Op.isReg()) {
+ unsigned Reg = getXRegFromWReg(Op.getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// FIXME: Likewise for uxt[bh] with a Xd dst operand
else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg() &&
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg() &&
AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
- Op->getReg())) {
+ Op.getReg())) {
// The source register can be Wn here, but the matcher expects a
// GPR32. Twiddle it here if necessary.
- AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
- if (Op->isReg()) {
- unsigned Reg = getWRegFromXReg(Op->getReg());
- Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
+ AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]);
+ if (Op.isReg()) {
+ unsigned Reg = getWRegFromXReg(Op.getReg());
+ Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
}
// Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
if (NumOperands == 3 && Tok == "fmov") {
- AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]);
- AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]);
- if (RegOp->isReg() && ImmOp->isFPImm() &&
- ImmOp->getFPImm() == (unsigned)-1) {
+ AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]);
+ AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
+ if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
unsigned zreg =
AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
- RegOp->getReg())
+ RegOp.getReg())
? AArch64::WZR
: AArch64::XZR;
- Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete ImmOp;
+ Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(),
+ Op.getEndLoc(), getContext());
}
}
@@ -3735,14 +3844,14 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
// If the match failed on a suffix token operand, tweak the diagnostic
// accordingly.
- if (((AArch64Operand *)Operands[ErrorInfo])->isToken() &&
- ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+ if (((AArch64Operand &)*Operands[ErrorInfo]).isToken() &&
+ ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix())
MatchResult = Match_InvalidSuffix;
return showMatchError(ErrorLoc, MatchResult);
@@ -3794,9 +3903,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidLabel:
case Match_MSR:
case Match_MRS: {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
// Any time we get here, there's nothing fancy to do. Just get the
// operand SMLoc and display the diagnostic.
- SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
return showMatchError(ErrorLoc, MatchResult);
@@ -3819,6 +3930,10 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveWord(8, Loc);
if (IDVal == ".tlsdesccall")
return parseDirectiveTLSDescCall(Loc);
+ if (IDVal == ".ltorg" || IDVal == ".pool")
+ return parseDirectiveLtorg(Loc);
+ if (IDVal == ".unreq")
+ return parseDirectiveUnreq(DirectiveID.getLoc());
return parseDirectiveLOH(IDVal, Loc);
}
@@ -3920,6 +4035,66 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
return false;
}
+/// parseDirectiveLtorg
+/// ::= .ltorg | .pool
+bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
+ getTargetStreamer().emitCurrentConstantPool();
+ return false;
+}
+
+/// parseDirectiveReq
+/// ::= name .req registername
+bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+ Parser.Lex(); // Eat the '.req' token.
+ SMLoc SRegLoc = getLoc();
+ unsigned RegNum = tryParseRegister();
+ bool IsVector = false;
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ StringRef Kind;
+ RegNum = tryMatchVectorRegister(Kind, false);
+ if (!Kind.empty()) {
+ Error(SRegLoc, "vector register without type specifier expected");
+ return false;
+ }
+ IsVector = true;
+ }
+
+ if (RegNum == static_cast<unsigned>(-1)) {
+ Parser.eatToEndOfStatement();
+ Error(SRegLoc, "register name or alias expected");
+ return false;
+ }
+
+ // Shouldn't be anything else.
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .req directive");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ Parser.Lex(); // Consume the EndOfStatement
+
+ auto pair = std::make_pair(IsVector, RegNum);
+ if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair)
+ Warning(L, "ignoring redefinition of register alias '" + Name + "'");
+
+ return true;
+}
+
+/// parseDirectiveUneq
+/// ::= .unreq registername
+bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
+ Parser.Lex(); // Eat the identifier.
+ return false;
+}
+
bool
AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
AArch64MCExpr::VariantKind &ELFRefKind,
@@ -3986,9 +4161,9 @@ extern "C" void LLVMInitializeAArch64AsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
- AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp);
+ AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp);
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
// immediate in the syntax.
@@ -4036,9 +4211,9 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
ExpectedVal = 8;
break;
}
- if (!Op->isImm())
+ if (!Op.isImm())
return Match_InvalidOperand;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
if (!CE)
return Match_InvalidOperand;
if (CE->getValue() == ExpectedVal)
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 2466368..2057c51 100644
--- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_ARM64_TLVP:
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
default:
- assert(0 && "bad LLVMDisassembler_VariantKind");
- return MCSymbolRefExpr::VK_None;
+ llvm_unreachable("bad LLVMDisassembler_VariantKind");
}
}
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
index be4ccad..d64c05b 100644
--- a/lib/Target/AArch64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -4,11 +4,5 @@ add_llvm_library(LLVMAArch64Disassembler
AArch64Disassembler.cpp
AArch64ExternalSymbolizer.cpp
)
-# workaround for hanging compilation on MSVC8, 9 and 10
-#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-#set_property(
-# SOURCE ARMDisassembler.cpp
-# PROPERTY COMPILE_FLAGS "/Od"
-# )
-#endif()
+
add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index f484a5b..8a21f06 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
else
O << getRegisterName(Reg);
} else
- assert(0 && "unknown operand kind in printPostIncOperand64");
+ llvm_unreachable("unknown operand kind in printPostIncOperand64");
}
void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
@@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
while (Stride--) {
switch (Reg) {
default:
- assert(0 && "Vector register expected!");
+ llvm_unreachable("Vector register expected!");
case AArch64::Q0: Reg = AArch64::Q1; break;
case AArch64::Q1: Reg = AArch64::Q2; break;
case AArch64::Q2: Reg = AArch64::Q3; break;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index d8900d4..a917616 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -86,7 +86,7 @@ public:
static unsigned getFixupKindNumBytes(unsigned Kind) {
switch (Kind) {
default:
- assert(0 && "Unknown fixup kind!");
+ llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_tlsdesc_call:
return 0;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index dc4a8bf..1763b40 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -96,4 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
ExceptionsType = ExceptionHandling::DwarfCFI;
UseIntegratedAssembler = true;
+
+ HasIdentDirective = true;
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 464a18c..f051357 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- else {
- assert(MO.isImm() && "did not expect relocated expression");
- return static_cast<unsigned>(MO.getImm());
- }
- assert(0 && "Unable to encode MCOperand!");
- return 0;
+ assert(MO.isImm() && "did not expect relocated expression");
+ return static_cast<unsigned>(MO.getImm());
}
template<unsigned FixupKind> uint32_t
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 85c3ec7..42a6787 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -81,37 +81,8 @@ void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
OS << *Expr;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
const MCSection *AArch64MCExpr::FindAssociatedSection() const {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index e869ed0..5422f9d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -147,7 +147,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 5c86189..ba95366 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
Log2Size = llvm::Log2_32(4);
switch (Sym->getKind()) {
default:
- assert(0 && "Unexpected symbol reference variant kind!");
+ llvm_unreachable("Unexpected symbol reference variant kind!");
case MCSymbolRefExpr::VK_PAGEOFF:
RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
return true;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
new file mode 100644
index 0000000..f9aeb35
--- /dev/null
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -0,0 +1,40 @@
+//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AArch64TargetStreamer class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+//
+// AArch64TargetStreamer Implemenation
+//
+AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {}
+
+AArch64TargetStreamer::~AArch64TargetStreamer() {}
+
+// The constant pool handling is shared by all AArch64TargetStreamer
+// implementations.
+const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
+ return ConstantPools->addEntry(Streamer, Expr);
+}
+
+void AArch64TargetStreamer::emitCurrentConstantPool() {
+ ConstantPools->emitForCurrentSection(Streamer);
+}
+
+// finish() - write out any non-empty assembler constant pools.
+void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk
index e9d2323..a23c0e5 100644
--- a/lib/Target/AArch64/MCTargetDesc/Android.mk
+++ b/lib/Target/AArch64/MCTargetDesc/Android.mk
@@ -14,7 +14,8 @@ aarch64_mc_desc_SRC_FILES := \
AArch64MCAsmInfo.cpp \
AArch64MCCodeEmitter.cpp \
AArch64MCExpr.cpp \
- AArch64MCTargetDesc.cpp
+ AArch64MCTargetDesc.cpp \
+ AArch64TargetStreamer.cpp
# For the host
# =====================================================
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 7d5bced..6d8be5e 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCExpr.cpp
AArch64MCTargetDesc.cpp
AArch64MachObjectWriter.cpp
+ AArch64TargetStreamer.cpp
)
add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 9e4c389..9d2ce21 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) {
}
inline static CondCode getInvertedCondCode(CondCode Code) {
- switch (Code) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return NE;
- case NE: return EQ;
- case HS: return LO;
- case LO: return HS;
- case MI: return PL;
- case PL: return MI;
- case VS: return VC;
- case VC: return VS;
- case HI: return LS;
- case LS: return HI;
- case GE: return LT;
- case LT: return GE;
- case GT: return LE;
- case LE: return GT;
- }
+ // To reverse a condition it's necessary to only invert the low bit:
+
+ return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
}
/// Given a condition code, return NZCV flags that would satisfy that condition.
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 94faf6f..92eaf9e 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg());
}
- assert(0 && "Unhandled update pattern!");
- return 0;
+ llvm_unreachable("Unhandled update pattern!");
}
// Return true if this MachineInstr inserts a scalar (SPR) value into
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 55e9fe5..28d2610 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -82,7 +82,8 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
assert(GV && "C++ constructor pointer was not a GlobalValue!");
- const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV),
+ const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV,
+ ARMII::MO_NO_FLAG),
(Subtarget->isTargetELF()
? MCSymbolRefExpr::VK_ARM_TARGET1
: MCSymbolRefExpr::VK_None),
@@ -164,7 +165,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
(TF & ARMII::MO_HI16))
O << ":upper16:";
- O << *getSymbol(GV);
+ O << *GetARMGVSymbol(GV, TF);
printOffset(MO.getOffset(), O);
if (TF == ARMII::MO_PLT)
@@ -730,6 +731,32 @@ void ARMAsmPrinter::emitAttributes() {
if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt);
+ if (MMI) {
+ if (const Module *SourceModule = MMI->getModule()) {
+ // ABI_PCS_wchar_t to indicate wchar_t width
+ // FIXME: There is no way to emit value 0 (wchar_t prohibited).
+ if (auto WCharWidthValue = cast_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("wchar_size"))) {
+ int WCharWidth = WCharWidthValue->getZExtValue();
+ assert((WCharWidth == 2 || WCharWidth == 4) &&
+ "wchar_t width must be 2 or 4 bytes");
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth);
+ }
+
+ // ABI_enum_size to indicate enum width
+ // FIXME: There is no way to emit value 0 (enums prohibited) or value 3
+ // (all enums contain a value needing 32 bits to encode).
+ if (auto EnumWidthValue = cast_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("min_enum_size"))) {
+ int EnumWidth = EnumWidthValue->getZExtValue();
+ assert((EnumWidth == 1 || EnumWidth == 4) &&
+ "Minimum enum width must be 1 or 4 bytes");
+ int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
+ ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
+ }
+ }
+ }
+
if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization())
ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowTZVirtualization);
@@ -768,23 +795,41 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
unsigned char TargetFlags) {
- bool isIndirect = Subtarget->isTargetMachO() &&
- (TargetFlags & ARMII::MO_NONLAZY) &&
- Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
- if (!isIndirect)
- return getSymbol(GV);
+ if (Subtarget->isTargetMachO()) {
+ bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) &&
+ Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+
+ if (!IsIndirect)
+ return getSymbol(GV);
- // FIXME: Remove this when Darwin transition to @GOT like syntax.
- MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
- MachineModuleInfoMachO &MMIMachO =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
- MachineModuleInfoImpl::StubValueTy &StubSym =
- GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
- MMIMachO.getGVStubEntry(MCSym);
- if (!StubSym.getPointer())
- StubSym = MachineModuleInfoImpl::
- StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
- return MCSym;
+ // FIXME: Remove this when Darwin transition to @GOT like syntax.
+ MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoMachO &MMIMachO =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym)
+ : MMIMachO.getGVStubEntry(MCSym);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV),
+ !GV->hasInternalLinkage());
+ return MCSym;
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+
+ bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT);
+ if (!IsIndirect)
+ return getSymbol(GV);
+
+ SmallString<128> Name;
+ Name = "__imp_";
+ getNameWithPrefix(Name, GV);
+
+ return OutContext.GetOrCreateSymbol(Name);
+ } else if (Subtarget->isTargetELF()) {
+ return getSymbol(GV);
+ }
+ llvm_unreachable("unexpected target");
}
void ARMAsmPrinter::
@@ -928,7 +973,7 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
MachineBasicBlock *MBB = JTBBs[i];
const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
- OutContext);
+ OutContext);
// If this isn't a TBB or TBH, the entries are direct branch instructions.
if (OffsetWidth == 4) {
EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B)
@@ -1225,8 +1270,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Add 's' bit operand (always reg0 for this)
.addReg(0));
- const GlobalValue *GV = MI->getOperand(0).getGlobal();
- MCSymbol *GVSym = getSymbol(GV);
+ const MachineOperand &Op = MI->getOperand(0);
+ const GlobalValue *GV = Op.getGlobal();
+ const unsigned TF = Op.getTargetFlags();
+ MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc)
.addExpr(GVSymExpr)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index bc266e8..0288db9 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -102,14 +103,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
// currently defaults to no prepass hazard recognizer.
-ScheduleHazardRecognizer *ARMBaseInstrInfo::
-CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
if (usePreRAHazardRecognizer()) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
+ const InstrItineraryData *II =
+ &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -1885,7 +1887,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
unsigned NumBytes) {
// This optimisation potentially adds lots of load and store
// micro-operations, it's only really a great benefit to code-size.
- if (!Subtarget.isMinSize())
+ if (!MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize))
return false;
// If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -4358,6 +4361,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI,
MI->addRegisterKilled(DReg, TRI, true);
}
+void ARMBaseInstrInfo::getUnconditionalBranch(
+ MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+ if (Subtarget.isThumb())
+ Branch.setOpcode(ARM::tB);
+ else if (Subtarget.isThumb2())
+ Branch.setOpcode(ARM::t2B);
+ else
+ Branch.setOpcode(ARM::Bcc);
+
+ Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+ Branch.addOperand(MCOperand::CreateImm(ARMCC::AL));
+ Branch.addOperand(MCOperand::CreateReg(0));
+}
+
+void ARMBaseInstrInfo::getTrap(MCInst &MI) const {
+ if (Subtarget.isThumb())
+ MI.setOpcode(ARM::tTRAP);
+ else if (Subtarget.useNaClTrap())
+ MI.setOpcode(ARM::TRAPNaCl);
+ else
+ MI.setOpcode(ARM::TRAP);
+}
+
bool ARMBaseInstrInfo::hasNOP() const {
return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 4b3e740..b8d6758 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -50,7 +50,7 @@ public:
const ARMSubtarget &getSubtarget() const { return Subtarget; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
@@ -229,6 +229,13 @@ public:
const TargetRegisterInfo*) const override;
void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned,
const TargetRegisterInfo *TRI) const override;
+
+ void
+ getUnconditionalBranch(MCInst &Branch,
+ const MCSymbolRefExpr *BranchTarget) const override;
+
+ void getTrap(MCInst &MI) const override;
+
/// Get the number of addresses by LDM or VLDM or zero for unknown.
unsigned getNumLDMAddresses(const MachineInstr *MI) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a2eee9f..cdd91c7 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -45,9 +45,12 @@ using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
- if (STI.isTargetMachO())
- FramePtr = ARM::R7;
- else if (STI.isTargetWindows())
+ if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin() || STI.isThumb1Only())
+ FramePtr = ARM::R7;
+ else
+ FramePtr = ARM::R11;
+ } else if (STI.isTargetWindows())
FramePtr = ARM::R11;
else // ARM EABI
FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 2fd7edd..5fb6ebf 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,6 +15,7 @@
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 6045738..51d3dbb 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -927,10 +927,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::tTPsoft:
case ARM::TPsoft: {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
- .addExternalSymbol("__aeabi_read_tp", 0);
+ MachineInstrBuilder MIB;
+ if (Opcode == ARM::tTPsoft)
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__aeabi_read_tp", 0);
+ else
+ MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get( ARM::BL))
+ .addExternalSymbol("__aeabi_read_tp", 0);
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
TransferImpOps(MI, MIB, MIB);
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 6f8fb1a..e2d90cd 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -590,7 +590,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// Use movw+movt when possible, it avoids constant pool entries.
// Non-darwin targets only support static movt relocations in FastISel.
- if (Subtarget->useMovt() &&
+ if (Subtarget->useMovt(*FuncInfo.MF) &&
(Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
unsigned Opc;
unsigned char TF = 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 0caf4bf..a67b360 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -39,6 +39,10 @@ static MachineBasicBlock::iterator
skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
unsigned NumAlignedDPRCS2Regs);
+ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)
+ : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+ STI(sti) {}
+
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -220,7 +224,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin()) {
GPRCS2Size += 4;
break;
}
@@ -380,7 +384,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO())
+ if (STI.isTargetDarwin())
break;
// fallthrough
case ARM::R0:
@@ -445,7 +449,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
case ARM::R10:
case ARM::R11:
case ARM::R12:
- if (STI.isTargetMachO()) {
+ if (STI.isTargetDarwin()) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned Offset = MFI->getObjectOffset(FI);
unsigned CFIIndex = MMI.addFrameInst(
@@ -810,7 +814,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
unsigned LastReg = 0;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetMachO())) continue;
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// D-registers in the aligned area DPRCS2 are NOT spilled here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -888,7 +892,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool DeleteRet = false;
for (; i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- if (!(Func)(Reg, STI.isTargetMachO())) continue;
+ if (!(Func)(Reg, STI.isTargetDarwin())) continue;
// The aligned reloads from area DPRCS2 are not inserted here.
if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1438,7 +1442,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (Spilled) {
NumGPRSpills++;
- if (!STI.isTargetMachO()) {
+ if (!STI.isTargetDarwin()) {
if (Reg == ARM::LR)
LRSpilled = true;
CS1Spilled = true;
@@ -1460,7 +1464,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
break;
}
} else {
- if (!STI.isTargetMachO()) {
+ if (!STI.isTargetDarwin()) {
UnspilledCS1GPRs.push_back(Reg);
continue;
}
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 981d320..709afbc 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -14,7 +14,6 @@
#ifndef ARM_FRAMEINFO_H
#define ARM_FRAMEINFO_H
-#include "ARMSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -25,10 +24,7 @@ protected:
const ARMSubtarget &STI;
public:
- explicit ARMFrameLowering(const ARMSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
- STI(sti) {
- }
+ explicit ARMFrameLowering(const ARMSubtarget &sti);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 08d598d..38547cf 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -60,22 +60,17 @@ enum AddrMode2Type {
};
class ARMDAGToDAGISel : public SelectionDAGISel {
- ARMBaseTargetMachine &TM;
-
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
public:
- explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
- CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm),
- Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
- }
+ explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
+ : SelectionDAGISel(tm, OptLevel) {}
bool runOnMachineFunction(MachineFunction &MF) override {
// Reset the subtarget each time through.
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>();
SelectionDAGISel::runOnMachineFunction(MF);
return true;
}
@@ -429,8 +424,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
if (Use->getOpcode() == ISD::CopyToReg)
return true;
if (Use->isMachineOpcode()) {
- const ARMBaseInstrInfo *TII =
- static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+ const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
+ CurDAG->getTarget().getInstrInfo());
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
if (MCID.mayStore())
@@ -2444,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(*MF))
// Thumb2-aware targets have the MOVT instruction, so all immediates can
// be done with MOV + MOVT, at worst.
UseCP = false;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 00d07e8..4bfa5a8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -155,16 +155,16 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
}
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO())
return new TargetLoweringObjectFileMachO();
- if (TM.getSubtarget<ARMSubtarget>().isTargetWindows())
+ if (TT.isOSWindows())
return new TargetLoweringObjectFileCOFF();
return new ARMElfTargetObjectFile();
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
Itins = TM.getInstrItineraryData();
@@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setExceptionSelectorRegister(ARM::R1);
}
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ else
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
// ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
// the default expansion.
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) {
@@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
+ case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
@@ -1199,7 +1205,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
- else if (Subtarget->hasVFP2() &&
+ else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
@@ -1207,10 +1213,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
if (!Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2() && !isVarArg)
+ if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
return CallingConv::ARM_APCS;
- } else if (Subtarget->hasVFP2() && !isVarArg)
+ } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::ARM_AAPCS_VFP;
else
return CallingConv::ARM_AAPCS;
@@ -1598,8 +1604,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
- assert (getTargetMachine().getRelocationModel() == Reloc::Static
- && "long-calls with non-static relocation model!");
+ assert((Subtarget->isTargetWindows() ||
+ getTargetMachine().getRelocationModel() == Reloc::Static) &&
+ "long-calls with non-static relocation model!");
// Handle a global address or an external symbol. If it's not one of
// those, the target's already in a register, so we don't need to do
// anything extra.
@@ -1647,6 +1654,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(),
DAG.getTargetGlobalAddress(GV, dl, getPointerTy()));
+ } else if (Subtarget->isTargetCOFF()) {
+ assert(Subtarget->isTargetWindows() &&
+ "Windows is the only supported COFF target");
+ unsigned TargetFlags = GV->hasDLLImportStorageClass()
+ ? ARMII::MO_DLLIMPORT
+ : ARMII::MO_NO_FLAG;
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0,
+ TargetFlags);
+ if (GV->hasDLLImportStorageClass())
+ Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(),
+ Callee), MachinePointerInfo::getGOT(),
+ false, false, false, 0);
} else {
// On ELF targets for PIC code, direct calls should go through the PLT
unsigned OpFlags = 0;
@@ -1688,7 +1708,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// FIXME: handle tail calls differently.
unsigned CallOpc;
- bool HasMinSizeAttr = Subtarget->isMinSize();
+ bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize);
if (Subtarget->isThumb()) {
if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
@@ -2326,7 +2347,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args),
+ 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
@@ -2434,7 +2456,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// If we have T2 ops, we can materialize the address directly via movt/movw
// pair. This is always cheaper.
- if (Subtarget->useMovt()) {
+ if (Subtarget->useMovt(DAG.getMachineFunction())) {
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
@@ -2456,7 +2478,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(DAG.getMachineFunction()))
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
@@ -2476,18 +2498,27 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
- assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+ assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
+ "Windows on ARM expects to use movw/movt");
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ const ARMII::TOF TargetFlags =
+ (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
EVT PtrVT = getPointerTy();
+ SDValue Result;
SDLoc DL(Op);
++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
- return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+ Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
+ TargetFlags));
+ if (GV->hasDLLImportStorageClass())
+ Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
+ MachinePointerInfo::getGOT(), false, false, false, 0);
+ return Result;
}
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
@@ -2535,6 +2566,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::arm_rbit: {
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "RBIT intrinsic must have i32 type!");
+ return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0));
+ }
case Intrinsic::arm_thread_pointer: {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -4492,6 +4528,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
BitMask <<= 8;
ImmMask <<= 1;
}
+
+ if (DAG.getTargetLoweringInfo().isBigEndian())
+ // swap higher and lower 32 bit word
+ Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
+
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -6078,7 +6119,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
- &Args, 0)
+ std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
@@ -6213,6 +6254,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
case ISD::SDIVREM:
case ISD::UDIVREM: return LowerDivRem(Op, DAG);
+ case ISD::DYNAMIC_STACKALLOC:
+ if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
+ return LowerDYNAMIC_STACKALLOC(Op, DAG);
+ llvm_unreachable("Don't know how to custom lower this!");
}
}
@@ -7112,6 +7157,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
}
MachineBasicBlock *
+ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ const TargetMachine &TM = getTargetMachine();
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ assert(Subtarget->isTargetWindows() &&
+ "__chkstk is only supported on Windows");
+ assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
+
+ // __chkstk takes the number of words to allocate on the stack in R4, and
+ // returns the stack adjustment in number of bytes in R4. This will not
+ // clober any other registers (other than the obvious lr).
+ //
+ // Although, technically, IP should be considered a register which may be
+ // clobbered, the call itself will not touch it. Windows on ARM is a pure
+ // thumb-2 environment, so there is no interworking required. As a result, we
+ // do not expect a veneer to be emitted by the linker, clobbering IP.
+ //
+ // Each module receives its own copy of __chkstk, so no import thunk is
+ // required, again, ensuring that IP is not clobbered.
+ //
+ // Finally, although some linkers may theoretically provide a trampoline for
+ // out of range calls (which is quite common due to a 32M range limitation of
+ // branches for Thumb), we can generate the long-call version via
+ // -mcmodel=large, alleviating the need for the trampoline which may clobber
+ // IP.
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault: {
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
+
+ BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
+ .addExternalSymbol("__chkstk");
+ BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(Reg, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit | RegState::Define)
+ .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead);
+ break;
+ }
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -7360,6 +7472,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
+ case ARM::WIN__CHKSTK:
+ return EmitLowered__chkstk(MI, BB);
}
}
@@ -8315,6 +8429,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
std::min(4U, LD->getAlignment() / 2));
DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
+ if (DCI.DAG.getTargetLoweringInfo().isBigEndian())
+ std::swap (NewLD1, NewLD2);
SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
DCI.RemoveFromWorklist(LD);
DAG.DeleteNode(LD);
@@ -8382,7 +8498,8 @@ static SDValue PerformSTORECombine(SDNode *N,
SDLoc DL(St);
SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
- for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+ for (unsigned i = 0; i < NumElems; ++i)
+ ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio;
// Can't shuffle using an illegal type.
if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
@@ -10471,13 +10588,39 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(InChain)
- .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0)
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
+SDValue
+ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "unsupported target platform");
+ SDLoc DL(Op);
+
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+
+ SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
+ DAG.getConstant(2, MVT::i32));
+
+ SDValue Flag;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
+ Flag = Chain.getValue(1);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+
+ SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
+ Chain = NewSP.getValue(1);
+
+ SDValue Ops[2] = { NewSP, Chain };
+ return DAG.getMergeValues(Ops, DL);
+}
+
bool
ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The ARM target isn't yet aware of offsets.
@@ -10635,14 +10778,20 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
// Loads and stores less than 64-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 64;
-
- // For the real atomic operations, we have ldrex/strex up to 64 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+ // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
+ // anything for those.
+ bool IsMClass = Subtarget->isMClass();
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
+ return Size == 64 && !IsMClass;
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+ return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass;
+ }
+
+ // For the real atomic operations, we have ldrex/strex up to 32 bits,
+ // and up to 64 bits on the non-M profiles
+ unsigned AtomicLimit = IsMClass ? 32 : 64;
+ return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit;
}
Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index c15305c..1ace0f3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -95,6 +95,8 @@ namespace llvm {
PRELOAD, // Preload
+ WIN__CHKSTK, // Windows' __chkstk call to do stack probing.
+
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCGE, // Vector compare greater than or equal.
@@ -470,6 +472,7 @@ namespace llvm {
const ARMSubtarget *ST) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
unsigned getRegisterByName(const char* RegName, EVT VT) const override;
@@ -578,6 +581,9 @@ namespace llvm {
MachineBasicBlock *EmitStructByval(MachineInstr *MI,
MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
};
enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 718d5da..2bb8976 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -270,8 +270,8 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseMovt : Predicate<"Subtarget->useMovt(*MF)">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">;
def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">;
def UseMulOps : Predicate<"Subtarget->useMulOps()">;
@@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> {
// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
def rot_imm_XFORM: SDNodeXForm<imm, [{
switch (N->getZExtValue()){
- default: assert(0);
+ default: llvm_unreachable(nullptr);
case 0: return CurDAG->getTargetConstant(0, MVT::i32);
case 8: return CurDAG->getTargetConstant(1, MVT::i32);
case 16: return CurDAG->getTargetConstant(2, MVT::i32);
@@ -594,7 +594,7 @@ def so_imm2part : PatLeaf<(imm), [{
/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
///
def arm_i32imm : PatLeaf<(imm), [{
- if (Subtarget->useMovt())
+ if (Subtarget->useMovt(*MF))
return true;
return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
}]>;
@@ -3334,8 +3334,8 @@ def SBFX : I<(outs GPRnopc:$Rd),
let Inst{3-0} = Rn;
}
-def UBFX : I<(outs GPR:$Rd),
- (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width),
+def UBFX : I<(outs GPRnopc:$Rd),
+ (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width),
AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
"ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
Requires<[IsARM, HasV6T2]> {
@@ -4443,7 +4443,7 @@ def instsyncb_opt : Operand<i32> {
let DecoderMethod = "DecodeInstSyncBarrierOption";
}
-// memory barriers protect the atomic sequences
+// Memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
"dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
@@ -4452,7 +4452,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{31-4} = 0xf57ff05;
let Inst{3-0} = opt;
}
-}
def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
"dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
@@ -4464,12 +4463,13 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
// ISB has only full system option
def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary,
- "isb", "\t$opt", []>,
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
Requires<[IsARM, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = opt;
}
+}
let usesCustomInserter = 1, Defs = [CPSR] in {
@@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
let Inst{11-0} = a;
}
+// Dynamic stack allocation yields a _chkstk for Windows targets. These calls
+// are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
+ [SDNPHasChain, SDNPSideEffect]>;
+let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
+ def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -5100,9 +5113,11 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary,
// __aeabi_read_tp preserves the registers r1-r3.
// This is a pseudo inst so that we can get the encoding right,
// complete with fixup for the aeabi_read_tp function.
+// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern
+// is defined in "ARMInstrThumb.td".
let isCall = 1,
Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
- def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+ def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br,
[(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index b32b5d2..c02bb3b 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -6372,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfSingle definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string InsnLanes, string InsnTy, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length.
//
@@ -6406,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
dsub_0))>;
}
+// The following class definition is basically a copy of the
+// Lengthen_Double definition above, however with an additional parameter
+// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
+// data loaded by VLD1LN into proper vector format in big endian mode.
+multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty, string RevLanes> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
+ (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV32d" # RevLanes)
+ (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0))>;
+}
+
// extload, zextload and sextload for a lengthening load followed by another
// lengthening load, to quadruple the initial length, but which ends up only
// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
@@ -6443,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
dsub_0)>;
}
+// The following class definition is basically a copy of the
+// Lengthen_HalfDouble definition above, however with an additional VREV16d8
+// instruction to convert data loaded by VLD1LN into proper vector format
+// in big endian mode.
+multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
+ string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+ string Insn2Ty> {
+ def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+ def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+ (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+ (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ dsub_0)),
+ dsub_0)>;
+}
+
defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
-defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
-defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+let Predicates = [IsLE] in {
+ defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
-// Double lengthening - v4i8 -> v4i16 -> v4i32
-defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i8 -> v2i16 -> v2i32
-defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
-// v2i16 -> v2i32 -> v2i64
-defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
+}
+
+let Predicates = [IsBE] in {
+ defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
+ defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
+
+ // Double lengthening - v4i8 -> v4i16 -> v4i32
+ defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
+ // v2i8 -> v2i16 -> v2i32
+ defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
+ // v2i16 -> v2i32 -> v2i64
+ defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
+}
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
-def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
- (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
-def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
- (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (VLD1LNd16 addrmode6:$addr,
- (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
+}
+// The following patterns are basically a copy of the patterns above,
+// however with an additional VREV16d instruction to convert data
+// loaded by VLD1LN into proper vector format in big endian mode.
+let Predicates = [IsBE] in {
+ def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
+ (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+ def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
+ (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+ (!cast<Instruction>("VREV16d8")
+ (VLD1LNd16 addrmode6:$addr,
+ (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
+}
//===----------------------------------------------------------------------===//
// Assembler aliases
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index c30d6ab..85e9351 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3209,27 +3209,28 @@ def t2MOVCCi32imm
let hasSideEffects = 1 in {
def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
"dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>,
- Requires<[HasDB]> {
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f5;
let Inst{3-0} = opt;
}
-}
def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary,
"dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>,
- Requires<[HasDB]> {
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f4;
let Inst{3-0} = opt;
}
def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary,
- "isb", "\t$opt", []>, Requires<[HasDB]> {
+ "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>,
+ Requires<[IsThumb, HasDB]> {
bits<4> opt;
let Inst{31-4} = 0xf3bf8f6;
let Inst{3-0} = opt;
}
+}
class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz,
InstrItinClass itin, string opc, string asm, string cstr,
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 8821c2d..6d1114d 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,6 +13,7 @@
#include "ARMJITInfo.h"
#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
#include "ARMRelocations.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
@@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
}
}
}
+
+void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+ JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+ IsPIC = isPIC;
+}
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ee4c863..27e2a20 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -14,7 +14,6 @@
#ifndef ARMJITINFO_H
#define ARMJITINFO_H
-#include "ARMMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -103,12 +102,7 @@ namespace llvm {
/// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
/// jump table ids to jump table bases map; remember if codegen relocation
/// model is PIC.
- void Initialize(const MachineFunction &MF, bool isPIC) {
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
- JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
- IsPIC = isPIC;
- }
+ void Initialize(const MachineFunction &MF, bool isPIC);
/// getConstantPoolEntryAddr - The ARM target puts all constant
/// pool entries into constant islands. This returns the address of the
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index ee7df54..a03bcdb 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -505,7 +505,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback. Check for this.
- if (Opcode == ARM::tLDRi && isThumb1)
+ if (Opcode == ARM::tLDMIA && isThumb1)
for (unsigned I = 0; I < NumRegs; ++I)
if (Base == Regs[I].first) {
Writeback = false;
@@ -519,17 +519,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
- // The base isn't dead after a merged instruction with writeback. Update
- // future uses of the base with the added offset (if possible), or reset
- // the base register as necessary.
- if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
-
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
+
+ // The base isn't dead after a merged instruction with writeback. Update
+ // future uses of the base with the added offset (if possible), or reset
+ // the base register as necessary.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
@@ -1734,6 +1734,12 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
+ // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
+ if (isThumb1) {
+ delete RS;
+ return false;
+ }
+
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 48141b1..023f5f8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -34,7 +34,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
OutContext);
switch (Option) {
default: llvm_unreachable("Unknown target flag on symbol operand");
- case 0:
+ case ARMII::MO_NO_FLAG:
break;
case ARMII::MO_LO16:
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index af445e2..892b269 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -12,3 +12,13 @@
using namespace llvm;
void ARMFunctionInfo::anchor() { }
+
+ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
+ : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+ hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+ StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
+ RestoreSPFromFP(false), LRSpilledForFarJump(false),
+ FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+ GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0),
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+ GlobalBaseReg(0) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d7ec6eb..44a9e34 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -130,16 +130,7 @@ public:
JumpTableUId(0), PICLabelUId(0),
VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
- explicit ARMFunctionInfo(MachineFunction &MF) :
- isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- StByValParamsPadding(0),
- ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
- LRSpilledForFarJump(false),
- FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
- GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- JumpTableUId(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+ explicit ARMFunctionInfo(MachineFunction &MF);
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -220,7 +211,7 @@ public:
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
- assert(0 && "Duplicate entries!");
+ llvm_unreachable("Duplicate entries!");
}
unsigned getOriginalCPIdx(unsigned CloneIdx) const {
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 008ad64..3dcc0df 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
#define DEBUG_TYPE "arm-selectiondag-info"
-ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
-}
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
@@ -34,6 +32,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
+ const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
@@ -44,7 +43,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
unsigned BytesLeft = SizeVal & 3;
@@ -54,7 +53,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned VTSize = 4;
unsigned i = 0;
// Emit a maximum of 4 loads in Thumb1 since we have fewer registers
- const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+ const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
SDValue TFOps[6];
SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
@@ -151,9 +150,10 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
+ const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>();
// Use default for non-AAPCS (or MachO) subtargets
- if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() ||
- Subtarget->isTargetWindows())
+ if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() ||
+ Subtarget.isTargetWindows())
return SDValue();
const ARMTargetLowering &TLI =
@@ -191,7 +191,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
.setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
- TLI.getPointerTy()), &Args, 0)
+ TLI.getPointerTy()), std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index 8c2397b..13769dc 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -36,12 +36,8 @@ namespace ARM_AM {
} // end namespace ARM_AM
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const ARMSubtarget *Subtarget;
-
public:
- explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+ explicit ARMSelectionDAGInfo(const DataLayout &DL);
~ARMSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5b204f6..0eb24ef 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,8 +12,15 @@
//===----------------------------------------------------------------------===//
#include "ARMSubtarget.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -76,22 +83,89 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
"Allow IT blocks based on ARMv7"),
clEnumValEnd));
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool IsLittle,
- const TargetOptions &Options)
- : ARMGenSubtargetInfo(TT, CPU, FS)
- , ARMProcFamily(Others)
- , ARMProcClass(None)
- , stackAlignment(4)
- , CPUString(CPU)
- , IsLittle(IsLittle)
- , TargetTriple(TT)
- , Options(Options)
- , TargetABI(ARM_ABI_UNKNOWN) {
+static std::string computeDataLayout(ARMSubtarget &ST) {
+ std::string Ret = "";
+
+ if (ST.isLittle())
+ // Little endian.
+ Ret += "e";
+ else
+ // Big endian.
+ Ret += "E";
+
+ Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+
+ // Pointers are 32 bits and aligned to 32 bits.
+ Ret += "-p:32:32";
+
+ // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
+ // align to 32.
+ if (ST.isThumb())
+ Ret += "-i1:8:32-i8:8:32-i16:16:32";
+
+ // ABIs other than APCS have 64 bit integers with natural alignment.
+ if (!ST.isAPCS_ABI())
+ Ret += "-i64:64";
+
+ // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
+ // bits, others to 64 bits. We always try to align to 64 bits.
+ if (ST.isAPCS_ABI())
+ Ret += "-f64:32:64";
+
+ // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
+ // to 64. We always ty to give them natural alignment.
+ if (ST.isAPCS_ABI())
+ Ret += "-v64:32:64-v128:32:128";
+ else
+ Ret += "-v128:64:128";
+
+ // On thumb and APCS, only try to align aggregates to 32 bits (the default is
+ // 64 bits).
+ if (ST.isThumb() || ST.isAPCS_ABI())
+ Ret += "-a:0:32";
+
+ // Integer registers are 32 bits.
+ Ret += "-n32";
+
+ // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
+ // aligned everywhere else.
+ if (ST.isTargetNaCl())
+ Ret += "-S128";
+ else if (ST.isAAPCS_ABI())
+ Ret += "-S64";
+ else
+ Ret += "-S32";
+
+ return Ret;
+}
+
+/// initializeSubtargetDependencies - Initializes using a CPU and feature string
+/// so that we can use initializer lists for subtarget initialization.
+ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool IsLittle, const TargetOptions &Options)
+ : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
+ TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+ TSInfo(DL), JITInfo(),
+ InstrInfo(isThumb1Only()
+ ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this)
+ : !isThumb()
+ ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
+ : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
+ TLInfo(TM),
+ FrameLowering(!isThumb1Only()
+ ? new ARMFrameLowering(*this)
+ : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {}
+
void ARMSubtarget::initializeEnvironment() {
HasV4TOps = false;
HasV5TOps = false;
@@ -106,7 +180,6 @@ void ARMSubtarget::initializeEnvironment() {
HasVFPv4 = false;
HasFPARMv8 = false;
HasNEON = false;
- MinSize = false;
UseNEONForSinglePrecisionFP = false;
UseMulOps = UseFusedMulOps;
SlowFPVMLx = false;
@@ -158,9 +231,6 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
-
- MinSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
}
void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -353,6 +423,17 @@ bool ARMSubtarget::hasSinCos() const {
!getTargetTriple().isOSVersionLT(7, 0);
}
+// Enable the PostMachineScheduler if the target selects it instead of
+// PostRAScheduler. Currently only available on the command line via
+// -misched-postra.
+bool ARMSubtarget::enablePostMachineScheduler() const {
+ return PostRAScheduler;
+}
+
+bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
+ return hasAnyDataBarrier() && !isThumb1Only();
+}
+
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
@@ -360,3 +441,12 @@ bool ARMSubtarget::enablePostRAScheduler(
Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
+
+bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
+ // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
+ // immediates as it is inherently position independent, and may be out of
+ // range otherwise.
+ return UseMovt && (isTargetWindows() ||
+ !MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize));
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 38536b2..8f6c165 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,8 +14,20 @@
#ifndef ARMSUBTARGET_H
#define ARMSUBTARGET_H
+
+#include "ARMFrameLowering.h"
+#include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
+#include "ARMSelectionDAGInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "ARMJITInfo.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -64,10 +76,6 @@ protected:
bool HasFPARMv8;
bool HasNEON;
- /// MinSize - True if the function being compiled has the "minsize" attribute
- /// and should be optimised for size at the expense of speed.
- bool MinSize;
-
/// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
/// specified. Use the method useNEONForSinglePrecisionFP() to
/// determine if NEON should actually be used.
@@ -236,7 +244,7 @@ protected:
/// of the specified triple.
///
ARMSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool IsLittle,
+ const std::string &FS, TargetMachine &TM, bool IsLittle,
const TargetOptions &Options);
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
@@ -250,7 +258,31 @@ protected:
/// \brief Reset the features for the ARM target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ const DataLayout *getDataLayout() const { return &DL; }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ ARMJITInfo *getJITInfo() { return &JITInfo; }
+ const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
+ const ARMTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); }
+ const ARMBaseRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+
private:
+ const DataLayout DL;
+ ARMSelectionDAGInfo TSInfo;
+ ARMJITInfo JITInfo;
+ // Either Thumb1InstrInfo or Thumb2InstrInfo.
+ std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
+ ARMTargetLowering TLInfo;
+ // Either Thumb1FrameLowering or ARMFrameLowering.
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
+
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
public:
@@ -286,7 +318,6 @@ public:
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
bool hasVirtualization() const { return HasVirtualization; }
- bool isMinSize() const { return MinSize; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -382,7 +413,8 @@ public:
bool isR9Reserved() const { return IsR9Reserved; }
- bool useMovt() const { return UseMovt && !isMinSize(); }
+ bool useMovt(const MachineFunction &MF) const;
+
bool supportsTailCall() const { return SupportsTailCall; }
bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
@@ -399,11 +431,17 @@ public:
/// compiler runtime or math libraries.
bool hasSinCos() const;
+ /// True for some subtargets at > -O0.
+ bool enablePostMachineScheduler() const;
+
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
+ // enableAtomicExpandLoadLinked - True if we need to expand our atomics.
+ bool enableAtomicExpandLoadLinked() const override;
+
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 8876227..d85194b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
cl::desc("Inhibit optimization of S->D register accesses on A15"),
cl::init(false));
+static cl::opt<bool>
+EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden,
+ cl::desc("Run SimplifyCFG after expanding atomic operations"
+ " to make use of cmpxchg flow-based information"),
+ cl::init(true));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -43,12 +49,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, Options),
- JITInfo(),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, isLittle, Options) {
// Default to triple-appropriate float ABI
if (Options.FloatABIType == FloatABI::Default)
@@ -67,74 +70,11 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
void ARMTargetMachine::anchor() { }
-static std::string computeDataLayout(ARMSubtarget &ST) {
- std::string Ret = "";
-
- if (ST.isLittle())
- // Little endian.
- Ret += "e";
- else
- // Big endian.
- Ret += "E";
-
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
-
- // Pointers are 32 bits and aligned to 32 bits.
- Ret += "-p:32:32";
-
- // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to
- // align to 32.
- if (ST.isThumb())
- Ret += "-i1:8:32-i8:8:32-i16:16:32";
-
- // ABIs other than APCS have 64 bit integers with natural alignment.
- if (!ST.isAPCS_ABI())
- Ret += "-i64:64";
-
- // We have 64 bits floats. The APCS ABI requires them to be aligned to 32
- // bits, others to 64 bits. We always try to align to 64 bits.
- if (ST.isAPCS_ABI())
- Ret += "-f64:32:64";
-
- // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others
- // to 64. We always ty to give them natural alignment.
- if (ST.isAPCS_ABI())
- Ret += "-v64:32:64-v128:32:128";
- else
- Ret += "-v128:64:128";
-
- // On thumb and APCS, only try to align aggregates to 32 bits (the default is
- // 64 bits).
- if (ST.isThumb() || ST.isAPCS_ABI())
- Ret += "-a:0:32";
-
- // Integer registers are 32 bits.
- Ret += "-n32";
-
- // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
- // aligned everywhere else.
- if (ST.isTargetNaCl())
- Ret += "-S128";
- else if (ST.isAAPCS_ABI())
- Ret += "-S64";
- else
- Ret += "-S32";
-
- return Ret;
-}
-
-ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
- InstrInfo(Subtarget),
- DL(computeDataLayout(Subtarget)),
- TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) {
initAsmInfo();
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
@@ -143,21 +83,21 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
void ARMLETargetMachine::anchor() { }
-ARMLETargetMachine::
-ARMLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
void ARMBETargetMachine::anchor() { }
-ARMBETargetMachine::
-ARMBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
void ThumbTargetMachine::anchor() { }
@@ -165,38 +105,29 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle),
- InstrInfo(Subtarget.hasThumb2()
- ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
- : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
- DL(computeDataLayout(Subtarget)),
- TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget.hasThumb2()
- ? new ARMFrameLowering(Subtarget)
- : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+ CodeGenOpt::Level OL, bool isLittle)
+ : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL,
+ isLittle) {
initAsmInfo();
}
void ThumbLETargetMachine::anchor() { }
-ThumbLETargetMachine::
-ThumbLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
void ThumbBETargetMachine::anchor() { }
-ThumbBETargetMachine::
-ThumbBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
namespace {
/// ARM Code Generator Pass Configuration Options.
@@ -213,6 +144,7 @@ public:
return *getARMTargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@@ -225,11 +157,21 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
return new ARMPassConfig(this, PM);
}
-bool ARMPassConfig::addPreISel() {
+void ARMPassConfig::addIRPasses() {
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ // Cmpxchg instructions are often used with a subsequent comparison to
+ // determine whether it succeeded. We can exploit existing control-flow in
+ // ldrex/strex loops to simplify this, but it needs tidying up.
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
- addPass(createAtomicExpandLoadLinkedPass(TM));
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
+ addPass(createCFGSimplificationPass());
+ TargetPassConfig::addIRPasses();
+}
+
+bool ARMPassConfig::addPreISel() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 664c992..b72b1df 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,17 +14,9 @@
#ifndef ARMTARGETMACHINE_H
#define ARMTARGETMACHINE_H
-#include "ARMFrameLowering.h"
-#include "ARMISelLowering.h"
#include "ARMInstrInfo.h"
-#include "ARMJITInfo.h"
-#include "ARMSelectionDAGInfo.h"
#include "ARMSubtarget.h"
-#include "Thumb1FrameLowering.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb2InstrInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -32,10 +24,6 @@ namespace llvm {
class ARMBaseTargetMachine : public LLVMTargetMachine {
protected:
ARMSubtarget Subtarget;
-private:
- ARMJITInfo JITInfo;
- InstrItineraryData InstrItins;
-
public:
ARMBaseTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -44,15 +32,29 @@ public:
CodeGenOpt::Level OL,
bool isLittle);
- ARMJITInfo *getJITInfo() override { return &JITInfo; }
const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const ARMBaseRegisterInfo *getRegisterInfo() const override {
+ return getSubtargetImpl()->getRegisterInfo();
+ }
const ARMTargetLowering *getTargetLowering() const override {
- // Implemented by derived classes
- llvm_unreachable("getTargetLowering not implemented");
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const ARMBaseInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const ARMFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
}
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
/// \brief Register ARM analysis passes with a pass manager.
void addAnalysisPasses(PassManagerBase &PM) override;
@@ -67,35 +69,10 @@ public:
///
class ARMTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- ARMInstrInfo InstrInfo;
- const DataLayout DL; // Calculates type size & alignment
- ARMTargetLowering TLInfo;
- ARMSelectionDAGInfo TSInfo;
- ARMFrameLowering FrameLowering;
public:
- ARMTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- const ARMRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- const ARMTargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
- const ARMFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const ARMInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
+ ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
};
/// ARMLETargetMachine - ARM little endian target machine.
@@ -114,10 +91,9 @@ public:
class ARMBETargetMachine : public ARMTargetMachine {
void anchor() override;
public:
- ARMBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL);
};
/// ThumbTargetMachine - Thumb target machine.
@@ -126,43 +102,10 @@ public:
///
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
- // Either Thumb1InstrInfo or Thumb2InstrInfo.
- std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
- const DataLayout DL; // Calculates type size & alignment
- ARMTargetLowering TLInfo;
- ARMSelectionDAGInfo TSInfo;
- // Either Thumb1FrameLowering or ARMFrameLowering.
- std::unique_ptr<ARMFrameLowering> FrameLowering;
public:
- ThumbTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
-
- /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
- const ARMBaseRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
- }
-
- const ARMTargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-
- /// returns either Thumb1InstrInfo or Thumb2InstrInfo
- const ARMBaseInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
- }
- /// returns either Thumb1FrameLowering or ARMFrameLowering
- const ARMFrameLowering *getFrameLowering() const override {
- return FrameLowering.get();
- }
- const DataLayout *getDataLayout() const override { return &DL; }
+ ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
};
/// ThumbLETargetMachine - Thumb little endian target machine.
@@ -170,10 +113,10 @@ public:
class ThumbLETargetMachine : public ThumbTargetMachine {
void anchor() override;
public:
- ThumbLETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL);
+ ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
};
/// ThumbBETargetMachine - Thumb big endian target machine.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 57df7da..a2ace62 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only handle costs of reverse shuffles for now.
- if (Kind != SK_Reverse)
+ // We only handle costs of reverse and alternate shuffles for now.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
- // Reverse shuffle cost one instruction if we are shuffling within a double
- // word (vrev) or two if we shuffle a quad word (vrev, vext).
- { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 },
- { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 },
-
- { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 },
- { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 }
- };
+ if (Kind == SK_Reverse) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+ // Reverse shuffle cost one instruction if we are shuffling within a
+ // double word (vrev) or two if we shuffle a quad word (vrev, vext).
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
- int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
- if (Idx == -1)
- return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- return LT.first * NEONShuffleTbl[Idx].Cost;
+ return LT.first * NEONShuffleTbl[Idx].Cost;
+ }
+ if (Kind == SK_Alternate) {
+ static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+ // Alt shuffle cost table for ARM. Cost is the number of instructions
+ // required to create the shuffled vector.
+
+ {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
+
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
+
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ int Idx =
+ CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx == -1)
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ return LT.first * NEONAltShuffleTbl[Idx].Cost;
+ }
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 5cdf394..b62706c 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -190,11 +190,11 @@ class ARMAsmParser : public MCTargetAsmParser {
}
int tryParseRegister();
- bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
- int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
- bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+ bool tryParseRegisterWithWriteBack(OperandVector &);
+ int tryParseShiftRegister(OperandVector &);
+ bool parseRegisterList(OperandVector &);
+ bool parseMemory(OperandVector &);
+ bool parseOperand(OperandVector &, StringRef Mnemonic);
bool parsePrefix(ARMMCExpr::VariantKind &RefKind);
bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType,
unsigned &ShiftAmount);
@@ -282,54 +282,42 @@ class ARMAsmParser : public MCTargetAsmParser {
/// }
- OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocNumOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocRegOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseCoprocOptionOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseMemBarrierOptOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseInstSyncBarrierOptOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseProcIFlagsOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseMSRMaskOperand(
- SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O,
- StringRef Op, int Low, int High);
- OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ OperandMatchResultTy parseITCondCode(OperandVector &);
+ OperandMatchResultTy parseCoprocNumOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocRegOperand(OperandVector &);
+ OperandMatchResultTy parseCoprocOptionOperand(OperandVector &);
+ OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
+ OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
+ OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
+ OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
+ int High);
+ OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
return parsePKHImm(O, "lsl", 0, 31);
}
- OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) {
+ OperandMatchResultTy parsePKHASRImm(OperandVector &O) {
return parsePKHImm(O, "asr", 1, 32);
}
- OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
- OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+ OperandMatchResultTy parseSetEndImm(OperandVector &);
+ OperandMatchResultTy parseShifterImm(OperandVector &);
+ OperandMatchResultTy parseRotImm(OperandVector &);
+ OperandMatchResultTy parseBitfield(OperandVector &);
+ OperandMatchResultTy parsePostIdxReg(OperandVector &);
+ OperandMatchResultTy parseAM3Offset(OperandVector &);
+ OperandMatchResultTy parseFPImm(OperandVector &);
+ OperandMatchResultTy parseVectorList(OperandVector &);
OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
SMLoc &EndLoc);
// Asm Match Converter Methods
- void cvtThumbMultiply(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
- void cvtThumbBranches(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &);
-
- bool validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
- bool shouldOmitCCOutOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- bool shouldOmitPredicateOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
+ void cvtThumbBranches(MCInst &Inst, const OperandVector &);
+
+ bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops);
+ bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
+ bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+
public:
enum ARMMatchResultTy {
Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY,
@@ -361,19 +349,17 @@ public:
// Implementation of the MCTargetAsmParser interface:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
unsigned checkTargetMatchPredicate(MCInst &Inst) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
void onLabelParsed(MCSymbol *Symbol) override;
};
@@ -545,8 +531,8 @@ class ARMOperand : public MCParsedAsmOperand {
struct BitfieldOp Bitfield;
};
- ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
public:
+ ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
@@ -2481,56 +2467,58 @@ public:
void print(raw_ostream &OS) const override;
- static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_ITCondMask);
+ static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ITCondMask);
Op->ITMask.Mask = Mask;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CondCode);
+ static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CondCode);
Op->CC.Val = CC;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CoprocNum);
+ static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocNum);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CoprocReg);
+ static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CoprocReg);
Op->Cop.Val = CopVal;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_CoprocOption);
+ static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_CoprocOption);
Op->Cop.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_CCOut);
+ static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_CCOut);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_Token);
+ static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -2538,20 +2526,20 @@ public:
return Op;
}
- static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_Register);
+ static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
- unsigned SrcReg,
- unsigned ShiftReg,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShiftedRegister);
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftReg, unsigned ShiftImm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedRegister);
Op->RegShiftedReg.ShiftTy = ShTy;
Op->RegShiftedReg.SrcReg = SrcReg;
Op->RegShiftedReg.ShiftReg = ShiftReg;
@@ -2561,11 +2549,10 @@ public:
return Op;
}
- static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy,
- unsigned SrcReg,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShiftedImmediate);
+ static std::unique_ptr<ARMOperand>
+ CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShiftedImmediate);
Op->RegShiftedImm.ShiftTy = ShTy;
Op->RegShiftedImm.SrcReg = SrcReg;
Op->RegShiftedImm.ShiftImm = ShiftImm;
@@ -2574,9 +2561,9 @@ public:
return Op;
}
- static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_ShifterImmediate);
+ static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_ShifterImmediate);
Op->ShifterImm.isASR = isASR;
Op->ShifterImm.Imm = Imm;
Op->StartLoc = S;
@@ -2584,17 +2571,18 @@ public:
return Op;
}
- static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_RotateImmediate);
+ static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_RotateImmediate);
Op->RotImm.Imm = Imm;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor);
+ static std::unique_ptr<ARMOperand>
+ CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor);
Op->Bitfield.LSB = LSB;
Op->Bitfield.Width = Width;
Op->StartLoc = S;
@@ -2602,8 +2590,8 @@ public:
return Op;
}
- static ARMOperand *
- CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs,
+ static std::unique_ptr<ARMOperand>
+ CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
SMLoc StartLoc, SMLoc EndLoc) {
assert (Regs.size() > 0 && "RegList contains no registers?");
KindTy Kind = k_RegisterList;
@@ -2617,7 +2605,7 @@ public:
// Sort based on the register encoding values.
array_pod_sort(Regs.begin(), Regs.end());
- ARMOperand *Op = new ARMOperand(Kind);
+ auto Op = make_unique<ARMOperand>(Kind);
for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator
I = Regs.begin(), E = Regs.end(); I != E; ++I)
Op->Registers.push_back(I->second);
@@ -2626,9 +2614,11 @@ public:
return Op;
}
- static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
- bool isDoubleSpaced, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorList);
+ static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum,
+ unsigned Count,
+ bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorList);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2637,10 +2627,10 @@ public:
return Op;
}
- static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
- bool isDoubleSpaced,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListAllLanes);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.isDoubleSpaced = isDoubleSpaced;
@@ -2649,11 +2639,10 @@ public:
return Op;
}
- static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
- unsigned Index,
- bool isDoubleSpaced,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index,
+ bool isDoubleSpaced, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_VectorListIndexed);
Op->VectorList.RegNum = RegNum;
Op->VectorList.Count = Count;
Op->VectorList.LaneIndex = Index;
@@ -2663,33 +2652,30 @@ public:
return Op;
}
- static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- ARMOperand *Op = new ARMOperand(k_VectorIndex);
+ static std::unique_ptr<ARMOperand>
+ CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) {
+ auto Op = make_unique<ARMOperand>(k_VectorIndex);
Op->VectorIndex.Val = Idx;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_Immediate);
+ static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static ARMOperand *CreateMem(unsigned BaseRegNum,
- const MCConstantExpr *OffsetImm,
- unsigned OffsetRegNum,
- ARM_AM::ShiftOpc ShiftType,
- unsigned ShiftImm,
- unsigned Alignment,
- bool isNegative,
- SMLoc S, SMLoc E,
- SMLoc AlignmentLoc = SMLoc()) {
- ARMOperand *Op = new ARMOperand(k_Memory);
+ static std::unique_ptr<ARMOperand>
+ CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm,
+ unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType,
+ unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S,
+ SMLoc E, SMLoc AlignmentLoc = SMLoc()) {
+ auto Op = make_unique<ARMOperand>(k_Memory);
Op->Memory.BaseRegNum = BaseRegNum;
Op->Memory.OffsetImm = OffsetImm;
Op->Memory.OffsetRegNum = OffsetRegNum;
@@ -2703,11 +2689,10 @@ public:
return Op;
}
- static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd,
- ARM_AM::ShiftOpc ShiftTy,
- unsigned ShiftImm,
- SMLoc S, SMLoc E) {
- ARMOperand *Op = new ARMOperand(k_PostIndexRegister);
+ static std::unique_ptr<ARMOperand>
+ CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy,
+ unsigned ShiftImm, SMLoc S, SMLoc E) {
+ auto Op = make_unique<ARMOperand>(k_PostIndexRegister);
Op->PostIdxReg.RegNum = RegNum;
Op->PostIdxReg.isAdd = isAdd;
Op->PostIdxReg.ShiftTy = ShiftTy;
@@ -2717,33 +2702,35 @@ public:
return Op;
}
- static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_MemBarrierOpt);
+ static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MemBarrierOpt);
Op->MBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt,
- SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt);
+ static std::unique_ptr<ARMOperand>
+ CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt);
Op->ISBOpt.Val = Opt;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_ProcIFlags);
+ static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags,
+ SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_ProcIFlags);
Op->IFlags.Val = IFlags;
Op->StartLoc = S;
Op->EndLoc = S;
return Op;
}
- static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
- ARMOperand *Op = new ARMOperand(k_MSRMask);
+ static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) {
+ auto Op = make_unique<ARMOperand>(k_MSRMask);
Op->MMask.Val = MMask;
Op->StartLoc = S;
Op->EndLoc = S;
@@ -2947,8 +2934,7 @@ int ARMAsmParser::tryParseRegister() {
// occurs, return -1. An irrecoverable error is one where tokens have been
// consumed in the process of trying to parse the shifter (i.e., when it is
// indeed a shifter operand, but malformed).
-int ARMAsmParser::tryParseShiftRegister(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -2972,7 +2958,8 @@ int ARMAsmParser::tryParseShiftRegister(
// The source register for the shift has already been added to the
// operand list, so we need to pop it off and combine it into the shifted
// register operand instead.
- std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ std::unique_ptr<ARMOperand> PrevOp(
+ (ARMOperand *)Operands.pop_back_val().release());
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
@@ -3049,8 +3036,7 @@ int ARMAsmParser::tryParseShiftRegister(
///
/// TODO this is likely to change to allow different register types and or to
/// parse for a specific register type.
-bool ARMAsmParser::
-tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) {
const AsmToken &RegTok = Parser.getTok();
int RegNo = tryParseRegister();
if (RegNo == -1)
@@ -3096,17 +3082,25 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// MatchCoprocessorOperandName - Try to parse an coprocessor related
-/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
-/// "c5", ...
+/// instruction with a symbolic operand name.
+/// We accept "crN" syntax for GAS compatibility.
+/// <operand-name> ::= <prefix><number>
+/// If CoprocOp is 'c', then:
+/// <prefix> ::= c | cr
+/// If CoprocOp is 'p', then :
+/// <prefix> ::= p
+/// <number> ::= integer in range [0, 15]
static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
// Use the same layout as the tablegen'erated register name matcher. Ugly,
// but efficient.
+ if (Name.size() < 2 || Name[0] != CoprocOp)
+ return -1;
+ Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front();
+
switch (Name.size()) {
default: return -1;
- case 2:
- if (Name[0] != CoprocOp)
- return -1;
- switch (Name[1]) {
+ case 1:
+ switch (Name[0]) {
default: return -1;
case '0': return 0;
case '1': return 1;
@@ -3119,10 +3113,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
case '8': return 8;
case '9': return 9;
}
- case 3:
- if (Name[0] != CoprocOp || Name[1] != '1')
+ case 2:
+ if (Name[0] != '1')
return -1;
- switch (Name[2]) {
+ switch (Name[1]) {
default: return -1;
// p10 and p11 are invalid for coproc instructions (reserved for FP/NEON)
case '0': return CoprocOp == 'p'? -1: 10;
@@ -3136,8 +3130,8 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseITCondCode(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3173,8 +3167,8 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3192,8 +3186,8 @@ parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
@@ -3210,8 +3204,8 @@ parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
/// coproc_option : '{' imm0_255 '}'
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
// If this isn't a '{', this isn't a coprocessor immediate operand.
@@ -3288,8 +3282,7 @@ static unsigned getDRegFromQReg(unsigned QReg) {
}
/// Parse a register list.
-bool ARMAsmParser::
-parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
assert(Parser.getTok().is(AsmToken::LCurly) &&
"Token is not a Left Curly Brace");
SMLoc S = Parser.getTok().getLoc();
@@ -3470,8 +3463,8 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
}
// parse a vector register list
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseVectorList(OperandVector &Operands) {
VectorLaneTy LaneKind;
unsigned LaneIndex;
SMLoc S = Parser.getTok().getLoc();
@@ -3721,8 +3714,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3792,8 +3785,8 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
unsigned Opt;
@@ -3843,8 +3836,8 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -3877,8 +3870,8 @@ parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
@@ -4005,9 +3998,9 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
- int Low, int High) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
+ int High) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier)) {
Error(Parser.getTok().getLoc(), Op + " operand expected.");
@@ -4053,8 +4046,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4082,8 +4075,8 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// lsl #n 'n' in [0,31]
/// asr #n 'n' in [1,32]
/// n == 32 encoded as n == 0.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseShifterImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -4152,8 +4145,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
/// of instructions. Legal values are:
/// ror #n 'n' in {0, 8, 16, 24}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseRotImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier))
@@ -4198,8 +4191,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseBitfield(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
if (Parser.getTok().isNot(AsmToken::Hash) &&
@@ -4266,8 +4259,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// postidx_reg := '+' register {, shift}
// | '-' register {, shift}
@@ -4315,8 +4308,8 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// am3offset := '+' register
// | '-' register
@@ -4388,26 +4381,24 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// Convert parsed operands to MCInst. Needed here because this instruction
/// only has two register operands, but multiplication is commutative so
/// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN".
-void ARMAsmParser::
-cvtThumbMultiply(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
- ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
+void ARMAsmParser::cvtThumbMultiply(MCInst &Inst,
+ const OperandVector &Operands) {
+ ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1);
+ ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1);
// If we have a three-operand form, make sure to set Rn to be the operand
// that isn't the same as Rd.
unsigned RegOp = 4;
if (Operands.size() == 6 &&
- ((ARMOperand*)Operands[4])->getReg() ==
- ((ARMOperand*)Operands[3])->getReg())
+ ((ARMOperand &)*Operands[4]).getReg() ==
+ ((ARMOperand &)*Operands[3]).getReg())
RegOp = 5;
- ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+ ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1);
Inst.addOperand(Inst.getOperand(0));
- ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
+ ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2);
}
-void ARMAsmParser::
-cvtThumbBranches(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
+ const OperandVector &Operands) {
int CondOp = -1, ImmOp = -1;
switch(Inst.getOpcode()) {
case ARM::tB:
@@ -4430,7 +4421,7 @@ cvtThumbBranches(MCInst &Inst,
} else {
// outside IT blocks we can only have unconditional branches with AL
// condition code or conditional branches with non-AL condition code
- unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode();
+ unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode();
switch(Inst.getOpcode()) {
case ARM::tB:
case ARM::tBcc:
@@ -4447,27 +4438,26 @@ cvtThumbBranches(MCInst &Inst,
switch(Inst.getOpcode()) {
// classify tB as either t2B or t1B based on range of immediate operand
case ARM::tB: {
- ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
- if(!op->isSignedOffset<11, 1>() && isThumbTwo())
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<11, 1>() && isThumbTwo())
Inst.setOpcode(ARM::t2B);
break;
}
// classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand
case ARM::tBcc: {
- ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]);
- if(!op->isSignedOffset<8, 1>() && isThumbTwo())
+ ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]);
+ if (!op.isSignedOffset<8, 1>() && isThumbTwo())
Inst.setOpcode(ARM::t2Bcc);
break;
}
}
- ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1);
- ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2);
+ ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1);
+ ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2);
}
/// Parse an ARM memory expression, return false if successful else return true
/// or an error. The first token must be a '[' when called.
-bool ARMAsmParser::
-parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::parseMemory(OperandVector &Operands) {
SMLoc S, E;
assert(Parser.getTok().is(AsmToken::LBrac) &&
"Token is not a Left Bracket");
@@ -4717,8 +4707,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
}
/// parseFPImm - A floating point immediate expression operand.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
-parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ARMAsmParser::OperandMatchResultTy
+ARMAsmParser::parseFPImm(OperandVector &Operands) {
// Anything that can accept a floating point constant as an operand
// needs to go through here, as the regular parseExpression is
// integer only.
@@ -4744,12 +4734,12 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// integer constant. Make sure we don't try to parse an FPImm
// for these:
// vmov.i{8|16|32|64} <dreg|qreg>, #imm
- ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
- bool isVmovf = TyOp->isToken() && (TyOp->getToken() == ".f32" ||
- TyOp->getToken() == ".f64");
- ARMOperand *Mnemonic = static_cast<ARMOperand*>(Operands[0]);
- bool isFconst = Mnemonic->isToken() && (Mnemonic->getToken() == "fconstd" ||
- Mnemonic->getToken() == "fconsts");
+ ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]);
+ bool isVmovf = TyOp.isToken() &&
+ (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64");
+ ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]);
+ bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
+ Mnemonic.getToken() == "fconsts");
if (!(isVmovf || isFconst))
return MatchOperand_NoMatch;
@@ -4798,8 +4788,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
-bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic) {
+bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
SMLoc S, E;
// Check if the current operand has a custom associated parser, if so, try to
@@ -5125,7 +5114,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
}
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandVector &Operands) {
// FIXME: This is all horribly hacky. We really need a better way to deal
// with optional operands like this in the matcher table.
@@ -5138,17 +5127,17 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// conditionally adding the cc_out in the first place because we need
// to check the type of the parsed immediate operand.
if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() &&
- !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() &&
- static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() &&
+ static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
// Register-register 'add' for thumb does not have a cc_out operand
// when there are only two register operands.
if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
// Register-register 'add' for thumb does not have a cc_out operand
// when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do
@@ -5156,13 +5145,12 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// that can handle a different range and has a cc_out operand.
if (((isThumb() && Mnemonic == "add") ||
(isThumbTwo() && Mnemonic == "sub")) &&
- Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
- static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) ||
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4()))
return true;
// For Thumb2, add/sub immediate does not have a cc_out operand for the
// imm0_4095 variant. That's the least-preferred variant when
@@ -5170,23 +5158,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// should remove the cc_out operand, we have to explicitly check that
// it's not one of the other variants. Ugh.
if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") &&
- Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
+ Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
// Nest conditions rather than one big 'if' statement for readability.
//
// If both registers are low, we're in an IT block, and the immediate is
// in range, we should use encoding T1 instead, which has a cc_out.
if (inITBlock() &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
- isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) &&
- static_cast<ARMOperand*>(Operands[5])->isImm0_7())
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) &&
+ isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm0_7())
return false;
// Check against T3. If the second register is the PC, this is an
// alternate form of ADR, which uses encoding T4, so check for that too.
- if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
- static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
+ if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[5]).isT2SOImm())
return false;
// Otherwise, we use encoding T4, which does not have a cc_out
@@ -5198,35 +5185,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// if we have a "mul" mnemonic in Thumb mode, check if we'll be able to
// use the 16-bit encoding or not.
if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[5])->isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[5]).isReg() &&
// If the registers aren't low regs, the destination reg isn't the
// same as one of the source regs, or the cc_out operand is zero
// outside of an IT block, we have to use the 32-bit encoding, so
// remove the cc_out operand.
- (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
- !inITBlock() ||
- (static_cast<ARMOperand*>(Operands[3])->getReg() !=
- static_cast<ARMOperand*>(Operands[5])->getReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() !=
- static_cast<ARMOperand*>(Operands[4])->getReg())))
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) ||
+ !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[5]).getReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() !=
+ static_cast<ARMOperand &>(*Operands[4]).getReg())))
return true;
// Also check the 'mul' syntax variant that doesn't specify an explicit
// destination register.
if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
// If the registers aren't low regs or the cc_out operand is zero
// outside of an IT block, we have to use the 32-bit encoding, so
// remove the cc_out operand.
- (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
- !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+ (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) ||
+ !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) ||
!inITBlock()))
return true;
@@ -5239,32 +5225,32 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
// anyway.
if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") &&
(Operands.size() == 5 || Operands.size() == 6) &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
- static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
- (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP &&
+ static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 &&
+ (static_cast<ARMOperand &>(*Operands[4]).isImm() ||
(Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[5])->isImm())))
+ static_cast<ARMOperand &>(*Operands[5]).isImm())))
return true;
return false;
}
-bool ARMAsmParser::shouldOmitPredicateOperand(
- StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
+ OperandVector &Operands) {
// VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
unsigned RegIdx = 3;
if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
- static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") {
- if (static_cast<ARMOperand *>(Operands[3])->isToken() &&
- static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32")
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+ if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
RegIdx = 4;
- if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() &&
- (ARMMCRegisterClasses[ARM::DPRRegClassID]
- .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) ||
- ARMMCRegisterClasses[ARM::QPRRegClassID]
- .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg())))
+ if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
+ (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) ||
+ ARMMCRegisterClasses[ARM::QPRRegClassID].contains(
+ static_cast<ARMOperand &>(*Operands[RegIdx]).getReg())))
return true;
}
return false;
@@ -5309,8 +5295,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst,
/// Parse an arm instruction mnemonic followed by its operands.
bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ SMLoc NameLoc, OperandVector &Operands) {
// FIXME: Can this be done via tablegen in some fashion?
bool RequireVFPRegisterListCheck;
bool AcceptSinglePrecisionOnly;
@@ -5489,12 +5474,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Parser.Lex(); // Consume the EndOfStatement
if (RequireVFPRegisterListCheck) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands.back());
- if (AcceptSinglePrecisionOnly && !Op->isSPRRegList())
- return Error(Op->getStartLoc(),
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
+ if (AcceptSinglePrecisionOnly && !Op.isSPRRegList())
+ return Error(Op.getStartLoc(),
"VFP/Neon single precision register expected");
- if (AcceptDoublePrecisionOnly && !Op->isDPRRegList())
- return Error(Op->getStartLoc(),
+ if (AcceptDoublePrecisionOnly && !Op.isDPRRegList())
+ return Error(Op.getStartLoc(),
"VFP/Neon double precision register expected");
}
@@ -5505,20 +5490,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// try to remove a cc_out operand that was explicitly set on the the
// mnemonic, of course (CarrySetting == true). Reason number #317 the
// table driven matcher doesn't fit well with the ARM instruction set.
- if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands))
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// Some instructions have the same mnemonic, but don't always
// have a predicate. Distinguish them here and delete the
// predicate if needed.
- if (shouldOmitPredicateOperand(Mnemonic, Operands)) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ if (shouldOmitPredicateOperand(Mnemonic, Operands))
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// ARM mode 'blx' need special handling, as the register operand version
// is predicable, but the label operand version is not. So, we can't rely
@@ -5526,11 +5505,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// a k_CondCode operand in the list. If we're trying to match the label
// version, remove the k_CondCode operand here.
if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 &&
- static_cast<ARMOperand*>(Operands[2])->isImm()) {
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]);
+ static_cast<ARMOperand &>(*Operands[2]).isImm())
Operands.erase(Operands.begin() + 1);
- delete Op;
- }
// Adjust operands of ldrexd/strexd to MCK_GPRPair.
// ldrexd/strexd require even/odd GPR pair. To enforce this constraint,
@@ -5543,53 +5519,50 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Mnemonic == "stlexd")) {
bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd");
unsigned Idx = isLoad ? 2 : 3;
- ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]);
- ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]);
+ ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]);
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]);
const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID);
// Adjust only if Op1 and Op2 are GPRs.
- if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) &&
- MRC.contains(Op2->getReg())) {
- unsigned Reg1 = Op1->getReg();
- unsigned Reg2 = Op2->getReg();
+ if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) &&
+ MRC.contains(Op2.getReg())) {
+ unsigned Reg1 = Op1.getReg();
+ unsigned Reg2 = Op2.getReg();
unsigned Rt = MRI->getEncodingValue(Reg1);
unsigned Rt2 = MRI->getEncodingValue(Reg2);
// Rt2 must be Rt + 1 and Rt must be even.
if (Rt + 1 != Rt2 || (Rt & 1)) {
- Error(Op2->getStartLoc(), isLoad ?
- "destination operands must be sequential" :
- "source operands must be sequential");
+ Error(Op2.getStartLoc(), isLoad
+ ? "destination operands must be sequential"
+ : "source operands must be sequential");
return true;
}
unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
&(MRI->getRegClass(ARM::GPRPairRegClassID)));
- Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2);
- Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg(
- NewReg, Op1->getStartLoc(), Op2->getEndLoc()));
- delete Op1;
- delete Op2;
+ Operands[Idx] =
+ ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc());
+ Operands.erase(Operands.begin() + Idx + 1);
}
}
// GNU Assembler extension (compatibility)
if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
- ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]);
- ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]);
- if (Op3->isMem()) {
- assert(Op2->isReg() && "expected register argument");
+ ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]);
+ ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]);
+ if (Op3.isMem()) {
+ assert(Op2.isReg() && "expected register argument");
unsigned SuperReg = MRI->getMatchingSuperReg(
- Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+ Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
assert(SuperReg && "expected register pair");
unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
- Operands.insert(Operands.begin() + 3,
- ARMOperand::CreateReg(PairedReg,
- Op2->getStartLoc(),
- Op2->getEndLoc()));
+ Operands.insert(
+ Operands.begin() + 3,
+ ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc()));
}
}
@@ -5599,19 +5572,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// so the Mnemonic is the original name "subs" and delete the predicate
// operand so it will match the table entry.
if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 &&
- static_cast<ARMOperand*>(Operands[3])->isReg() &&
- static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC &&
- static_cast<ARMOperand*>(Operands[4])->isReg() &&
- static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR &&
- static_cast<ARMOperand*>(Operands[5])->isImm()) {
- ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]);
- Operands.erase(Operands.begin());
- delete Op0;
- Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc));
-
- ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]);
+ static_cast<ARMOperand &>(*Operands[3]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC &&
+ static_cast<ARMOperand &>(*Operands[4]).isReg() &&
+ static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR &&
+ static_cast<ARMOperand &>(*Operands[5]).isImm()) {
+ Operands.front() = ARMOperand::CreateToken(Name, NameLoc);
Operands.erase(Operands.begin() + 1);
- delete Op1;
}
return false;
}
@@ -5657,9 +5624,8 @@ static bool instIsBreakpoint(const MCInst &Inst) {
}
// FIXME: We would really like to be able to tablegen'erate this.
-bool ARMAsmParser::
-validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::validateInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
SMLoc Loc = Operands[0]->getStartLoc();
@@ -5682,7 +5648,7 @@ validateInstruction(MCInst &Inst,
// Find the condition code Operand to get its SMLoc information.
SMLoc CondLoc;
for (unsigned I = 1; I < Operands.size(); ++I)
- if (static_cast<ARMOperand*>(Operands[I])->isCondCode())
+ if (static_cast<ARMOperand &>(*Operands[I]).isCondCode())
CondLoc = Operands[I]->getStartLoc();
return Error(CondLoc, "incorrect condition in IT block; got '" +
StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
@@ -5782,8 +5748,8 @@ validateInstruction(MCInst &Inst,
// in the register list.
unsigned Rn = Inst.getOperand(0).getReg();
bool HasWritebackToken =
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
bool ListContainsBase;
if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo())
return Error(Operands[3 + HasWritebackToken]->getStartLoc(),
@@ -5843,11 +5809,10 @@ validateInstruction(MCInst &Inst,
// this first statement is always true for the new Inst. Essentially, the
// destination is unconditionally copied into the second source operand
// without checking to see if it matches what we actually parsed.
- if (Operands.size() == 6 &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[5])->getReg()) &&
- (((ARMOperand*)Operands[3])->getReg() !=
- ((ARMOperand*)Operands[4])->getReg())) {
+ if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[5]).getReg()) &&
+ (((ARMOperand &)*Operands[3]).getReg() !=
+ ((ARMOperand &)*Operands[4]).getReg())) {
return Error(Operands[3]->getStartLoc(),
"destination register must match source register");
}
@@ -5900,23 +5865,23 @@ validateInstruction(MCInst &Inst,
}
// Final range checking for Thumb unconditional branch instructions.
case ARM::tB:
- if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>())
+ if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>())
return Error(Operands[2]->getStartLoc(), "branch target out of range");
break;
case ARM::t2B: {
int op = (Operands[2]->isImm()) ? 2 : 3;
- if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>())
return Error(Operands[op]->getStartLoc(), "branch target out of range");
break;
}
// Final range checking for Thumb conditional branch instructions.
case ARM::tBcc:
- if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>())
return Error(Operands[2]->getStartLoc(), "branch target out of range");
break;
case ARM::t2Bcc: {
int Op = (Operands[2]->isImm()) ? 2 : 3;
- if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>())
+ if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>())
return Error(Operands[Op]->getStartLoc(), "branch target out of range");
break;
}
@@ -5931,19 +5896,19 @@ validateInstruction(MCInst &Inst,
// lead to bugs that are difficult to find since this is an easy mistake
// to make.
int i = (Operands[3]->isImm()) ? 3 : 4;
- ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]);
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
if (CE) break;
- const MCExpr *E = dyn_cast<MCExpr>(Op->getImm());
+ const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
if (!E) break;
const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
- ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) {
- return Error(Op->getStartLoc(),
- "immediate expression for mov requires :lower16: or :upper16");
- break;
- }
- }
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16))
+ return Error(
+ Op.getStartLoc(),
+ "immediate expression for mov requires :lower16: or :upper16");
+ break;
+ }
}
return false;
@@ -6205,9 +6170,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
}
}
-bool ARMAsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool ARMAsmParser::processInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
switch (Inst.getOpcode()) {
// Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction.
case ARM::LDRT_POST:
@@ -6264,8 +6228,8 @@ processInstruction(MCInst &Inst,
// Select the narrow version if the immediate will fit.
if (Inst.getOperand(1).getImm() > 0 &&
Inst.getOperand(1).getImm() <= 0xff &&
- !(static_cast<ARMOperand*>(Operands[2])->isToken() &&
- static_cast<ARMOperand*>(Operands[2])->getToken() == ".w"))
+ !(static_cast<ARMOperand &>(*Operands[2]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w"))
Inst.setOpcode(ARM::tLDRpci);
else
Inst.setOpcode(ARM::t2LDRpci);
@@ -7355,8 +7319,8 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
- !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+ !(static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -7559,7 +7523,7 @@ processInstruction(MCInst &Inst,
case ARM::LDMIA_UPD:
// If this is a load of a single register via a 'pop', then we should use
// a post-indexed LDR instruction instead, per the ARM ARM.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" &&
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" &&
Inst.getNumOperands() == 5) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::LDR_POST_IMM);
@@ -7577,7 +7541,7 @@ processInstruction(MCInst &Inst,
case ARM::STMDB_UPD:
// If this is a store of a single register via a 'push', then we should use
// a pre-indexed STR instruction instead, per the ARM ARM.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" &&
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" &&
Inst.getNumOperands() == 5) {
MCInst TmpInst;
TmpInst.setOpcode(ARM::STR_PRE_IMM);
@@ -7593,7 +7557,7 @@ processInstruction(MCInst &Inst,
case ARM::t2ADDri12:
// If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
// mnemonic was used (not "addw"), encoding T3 is preferred.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" ||
ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
break;
Inst.setOpcode(ARM::t2ADDri);
@@ -7602,7 +7566,7 @@ processInstruction(MCInst &Inst,
case ARM::t2SUBri12:
// If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
// mnemonic was used (not "subw"), encoding T3 is preferred.
- if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+ if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" ||
ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
break;
Inst.setOpcode(ARM::t2SUBri);
@@ -7638,9 +7602,9 @@ processInstruction(MCInst &Inst,
!isARMLowRegister(Inst.getOperand(0).getReg()) ||
(unsigned)Inst.getOperand(2).getImm() > 255 ||
((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
break;
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
@@ -7661,8 +7625,8 @@ processInstruction(MCInst &Inst,
// 'as' behaviour. Make sure the wide encoding wasn't explicit.
if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
Inst.getOperand(5).getReg() != 0 ||
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w"))
break;
MCInst TmpInst;
TmpInst.setOpcode(ARM::tADDhirr);
@@ -7719,8 +7683,8 @@ processInstruction(MCInst &Inst,
// an error in validateInstruction().
unsigned Rn = Inst.getOperand(0).getReg();
bool hasWritebackToken =
- (static_cast<ARMOperand*>(Operands[3])->isToken() &&
- static_cast<ARMOperand*>(Operands[3])->getToken() == "!");
+ (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
+ static_cast<ARMOperand &>(*Operands[3]).getToken() == "!");
bool listContainsBase;
if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) ||
(!listContainsBase && !hasWritebackToken) ||
@@ -7782,10 +7746,10 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
(unsigned)Inst.getOperand(1).getImm() <= 255 &&
((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
- Inst.getOperand(4).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ Inst.getOperand(4).getReg() == ARM::CPSR) ||
+ (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
// The operands aren't in the same order for tMOVi8...
MCInst TmpInst;
TmpInst.setOpcode(ARM::tMOVi8);
@@ -7806,8 +7770,8 @@ processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == ARMCC::AL &&
Inst.getOperand(4).getReg() == ARM::CPSR &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
// The operands aren't the same for tMOV[S]r... (no cc_out)
MCInst TmpInst;
TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr);
@@ -7829,8 +7793,8 @@ processInstruction(MCInst &Inst,
if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
isARMLowRegister(Inst.getOperand(1).getReg()) &&
Inst.getOperand(2).getImm() == 0 &&
- (!static_cast<ARMOperand*>(Operands[2])->isToken() ||
- static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) {
+ (!static_cast<ARMOperand &>(*Operands[2]).isToken() ||
+ static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("Illegal opcode!");
@@ -7942,9 +7906,10 @@ processInstruction(MCInst &Inst,
isARMLowRegister(Inst.getOperand(2).getReg())) &&
Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
- !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -7981,9 +7946,10 @@ processInstruction(MCInst &Inst,
(Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() ||
Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) &&
((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) ||
- (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
- (!static_cast<ARMOperand*>(Operands[3])->isToken() ||
- !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) {
+ (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) &&
+ (!static_cast<ARMOperand &>(*Operands[3]).isToken() ||
+ !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower(
+ ".w"))) {
unsigned NewOpc;
switch (Inst.getOpcode()) {
default: llvm_unreachable("unexpected opcode");
@@ -8063,11 +8029,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
}
static const char *getSubtargetFeatureName(unsigned Val);
-bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -8136,7 +8101,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -8144,7 +8109,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction",
- ((ARMOperand*)Operands[0])->getLocRange());
+ ((ARMOperand &)*Operands[0]).getLocRange());
case Match_RequiresNotITBlock:
return Error(IDLoc, "flag setting instruction only valid outside IT block");
case Match_RequiresITBlock:
@@ -8154,12 +8119,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_RequiresThumb2:
return Error(IDLoc, "instruction variant requires Thumb2");
case Match_ImmRange0_15: {
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,15]");
}
case Match_ImmRange0_239: {
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
@@ -8175,7 +8140,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_DupAlignedMemoryRequires64or128:
case Match_AlignedMemoryRequires64or128or256:
{
- SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc();
+ SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
switch (MatchResult) {
default:
@@ -8923,28 +8888,22 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
}
// RAII object to make sure parsed operands are deleted.
- struct CleanupObject {
- SmallVector<MCParsedAsmOperand *, 1> Operands;
- ~CleanupObject() {
- for (unsigned I = 0, E = Operands.size(); I != E; ++I)
- delete Operands[I];
- }
- } CO;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
// Parse the register list
- if (parseRegisterList(CO.Operands))
+ if (parseRegisterList(Operands))
return false;
- ARMOperand *Op = (ARMOperand*)CO.Operands[0];
- if (!IsVector && !Op->isRegList()) {
+ ARMOperand &Op = (ARMOperand &)*Operands[0];
+ if (!IsVector && !Op.isRegList()) {
Error(L, ".save expects GPR registers");
return false;
}
- if (IsVector && !Op->isDPRRegList()) {
+ if (IsVector && !Op.isDPRRegList()) {
Error(L, ".vsave expects DPR registers");
return false;
}
- getTargetStreamer().emitRegSave(Op->getRegList(), IsVector);
+ getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
return false;
}
@@ -9468,23 +9427,23 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
- ARMOperand *Op = static_cast<ARMOperand*>(AsmOp);
+ ARMOperand &Op = static_cast<ARMOperand &>(AsmOp);
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
// immediate in the syntax.
switch (Kind) {
default: break;
case MCK__35_0:
- if (Op->isImm())
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()))
+ if (Op.isImm())
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()))
if (CE->getValue() == 0)
return Match_Success;
break;
case MCK_ARMSOImm:
- if (Op->isImm()) {
- const MCExpr *SOExpr = Op->getImm();
+ if (Op.isImm()) {
+ const MCExpr *SOExpr = Op.getImm();
int64_t Value;
if (!SOExpr->EvaluateAsAbsolute(Value))
return Match_Success;
@@ -9493,8 +9452,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
}
break;
case MCK_GPRPair:
- if (Op->isReg() &&
- MRI->getRegClass(ARM::GPRRegClassID).contains(Op->getReg()))
+ if (Op.isReg() &&
+ MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
return Match_Success;
break;
}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e4b785d..228fb57 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -1092,13 +1092,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
if (isSub) {
O << ", "
<< markup("<imm:")
- << "#-" << -OffImm
+ << "#-" << formatImm(-OffImm)
<< markup(">");
}
else if (AlwaysPrintImm0 || OffImm > 0) {
O << ", "
<< markup("<imm:")
- << "#" << OffImm
+ << "#" << formatImm(OffImm)
<< markup(">");
}
O << "]" << markup(">");
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 42a1cbb..1686d76 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -295,7 +295,12 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x7f,
+ MO_OPTION_MASK = 0x3f,
+
+ /// MO_DLLIMPORT - On a symbol operand, this represents that the reference
+ /// to the symbol is for an import stub. This is used for DLL import
+ /// storage class indication on Windows.
+ MO_DLLIMPORT = 0x40,
/// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it
/// represents a symbol which, if indirect, will get special Darwin mangling
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index a4d13ed..7b5d8b0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -992,7 +992,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
return;
const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
- if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
+ unsigned Type = MCELF::GetType(SD);
+ if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc)
Streamer.EmitThumbFunc(Symbol);
}
@@ -1160,7 +1161,7 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) {
const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create(
PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext());
- AddValueSymbols(PersonalityRef);
+ visitUsedExpr(*PersonalityRef);
MCDataFragment *DF = getOrCreateDataFragment();
DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(),
PersonalityRef,
@@ -1332,6 +1333,12 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
return S;
}
+MCStreamer *createARMNullStreamer(MCContext &Ctx) {
+ MCStreamer *S = llvm::createNullStreamer(Ctx);
+ new ARMTargetStreamer(*S);
+ return S;
+}
+
MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter,
bool RelaxAll, bool NoExecStack,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5b51a52..b8ee555 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
// we have a movt or a movw, but that led to misleadingly results.
// This is now disallowed in the the AsmParser in validateInstruction()
// so this should never happen.
- assert(0 && "expression without :upper16: or :lower16:");
- return 0;
+ llvm_unreachable("expression without :upper16: or :lower16:");
}
uint32_t ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 87ea875..e545e3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -41,33 +41,6 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return false;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index d819139..c5c0b10 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -59,7 +59,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 04d63a7..2b3855d 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -427,6 +427,12 @@ extern "C" void LLVMInitializeARMTargetMC() {
TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer);
+ // Register the null streamer.
+ TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer);
+
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 8853a8c..5326e56 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -51,6 +51,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst);
+MCStreamer *createARMNullStreamer(MCContext &Ctx);
+
MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index ecfa4e5..186776a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -32,6 +32,7 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target,
+ unsigned Type,
unsigned Log2Size,
uint64_t &FixedValue);
void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
@@ -251,11 +252,11 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
const MCFragment *Fragment,
const MCFixup &Fixup,
MCValue Target,
+ unsigned Type,
unsigned Log2Size,
uint64_t &FixedValue) {
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
- unsigned Type = MachO::ARM_RELOC_VANILLA;
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
@@ -272,6 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
@@ -374,7 +376,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
Fixup, Target, FixedValue);
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ Target, RelocType, Log2Size,
+ FixedValue);
}
// Get the symbol data, if any.
@@ -392,7 +395,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Offset += 1 << Log2Size;
if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
- Target, Log2Size, FixedValue);
+ Target, RelocType, Log2Size,
+ FixedValue);
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index e3cfb05..ad3f1ca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -11,147 +11,12 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/MapVector.h"
+#include "llvm/MC/ConstantPools.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
using namespace llvm;
-
-namespace {
-// A class to keep track of assembler-generated constant pools that are use to
-// implement the ldr-pseudo.
-class ConstantPool {
- typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy;
- EntryVecTy Entries;
-
-public:
- // Initialize a new empty constant pool
- ConstantPool() {}
-
- // Add a new entry to the constant pool in the next slot.
- // \param Value is the new entry to put in the constant pool.
- //
- // \returns a MCExpr that references the newly inserted value
- const MCExpr *addEntry(const MCExpr *Value, MCContext &Context);
-
- // Emit the contents of the constant pool using the provided streamer.
- void emitEntries(MCStreamer &Streamer);
-
- // Return true if the constant pool is empty
- bool empty();
-};
-}
-
-namespace llvm {
-class AssemblerConstantPools {
- // Map type used to keep track of per-Section constant pools used by the
- // ldr-pseudo opcode. The map associates a section to its constant pool. The
- // constant pool is a vector of (label, value) pairs. When the ldr
- // pseudo is parsed we insert a new (label, value) pair into the constant pool
- // for the current section and add MCSymbolRefExpr to the new label as
- // an opcode to the ldr. After we have parsed all the user input we
- // output the (label, value) pairs in each constant pool at the end of the
- // section.
- //
- // We use the MapVector for the map type to ensure stable iteration of
- // the sections at the end of the parse. We need to iterate over the
- // sections in a stable order to ensure that we have print the
- // constant pools in a deterministic order when printing an assembly
- // file.
- typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy;
- ConstantPoolMapTy ConstantPools;
-
-public:
- AssemblerConstantPools() {}
- ~AssemblerConstantPools() {}
-
- void emitAll(MCStreamer &Streamer);
- void emitForCurrentSection(MCStreamer &Streamer);
- const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr);
-
-private:
- ConstantPool *getConstantPool(const MCSection *Section);
- ConstantPool &getOrCreateConstantPool(const MCSection *Section);
-};
-}
-
-//
-// ConstantPool implementation
-//
-// Emit the contents of the constant pool using the provided streamer.
-void ConstantPool::emitEntries(MCStreamer &Streamer) {
- if (Entries.empty())
- return;
- Streamer.EmitCodeAlignment(4); // align to 4-byte address
- Streamer.EmitDataRegion(MCDR_DataRegion);
- for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end();
- I != E; ++I) {
- Streamer.EmitLabel(I->first);
- Streamer.EmitValue(I->second, 4);
- }
- Streamer.EmitDataRegion(MCDR_DataRegionEnd);
- Entries.clear();
-}
-
-const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) {
- MCSymbol *CPEntryLabel = Context.CreateTempSymbol();
-
- Entries.push_back(std::make_pair(CPEntryLabel, Value));
- return MCSymbolRefExpr::Create(CPEntryLabel, Context);
-}
-
-bool ConstantPool::empty() { return Entries.empty(); }
-
-//
-// AssemblerConstantPools implementation
-//
-ConstantPool *
-AssemblerConstantPools::getConstantPool(const MCSection *Section) {
- ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
- if (CP == ConstantPools.end())
- return nullptr;
-
- return &CP->second;
-}
-
-ConstantPool &
-AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) {
- return ConstantPools[Section];
-}
-
-static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section,
- ConstantPool &CP) {
- if (!CP.empty()) {
- Streamer.SwitchSection(Section);
- CP.emitEntries(Streamer);
- }
-}
-
-void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
- // Dump contents of assembler constant pools.
- for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(),
- CPE = ConstantPools.end();
- CPI != CPE; ++CPI) {
- const MCSection *Section = CPI->first;
- ConstantPool &CP = CPI->second;
-
- emitConstantPool(Streamer, Section, CP);
- }
-}
-
-void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
- const MCSection *Section = Streamer.getCurrentSection().first;
- if (ConstantPool *CP = getConstantPool(Section)) {
- emitConstantPool(Streamer, Section, *CP);
- }
-}
-
-const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
- const MCExpr *Expr) {
- const MCSection *Section = Streamer.getCurrentSection().first;
- return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext());
-}
-
//
// ARMTargetStreamer Implemenation
//
@@ -175,78 +40,34 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
// The remaining callbacks should be handled separately by each
// streamer.
-void ARMTargetStreamer::emitFnStart() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFnEnd() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitCantUnwind() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitHandlerData() {
- llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitFnStart() {}
+void ARMTargetStreamer::emitFnEnd() {}
+void ARMTargetStreamer::emitCantUnwind() {}
+void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {}
+void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {}
+void ARMTargetStreamer::emitHandlerData() {}
void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg,
- int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitPad(int64_t Offset) {
- llvm_unreachable("unimplemented");
-}
-void
-ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
- bool isVector) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitUnwindRaw(
- int64_t StackOffset, const SmallVectorImpl<uint8_t> &Opcodes) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::switchVendor(StringRef Vendor) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
- llvm_unreachable("unimplemented");
-}
+ int64_t Offset) {}
+void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {}
+void ARMTargetStreamer::emitPad(int64_t Offset) {}
+void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList,
+ bool isVector) {}
+void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset,
+ const SmallVectorImpl<uint8_t> &Opcodes) {
+}
+void ARMTargetStreamer::switchVendor(StringRef Vendor) {}
+void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
void ARMTargetStreamer::emitTextAttribute(unsigned Attribute,
- StringRef String) {
- llvm_unreachable("unimplemented");
-}
+ StringRef String) {}
void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
- unsigned IntValue,
- StringRef StringValue) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitArch(unsigned Arch) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitObjectArch(unsigned Arch) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitFPU(unsigned FPU) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::finishAttributeSection() {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {
- llvm_unreachable("unimplemented");
-}
-void ARMTargetStreamer::AnnotateTLSDescriptorSequence(
- const MCSymbolRefExpr *SRE) {
- llvm_unreachable("unimplemented");
-}
+ unsigned IntValue,
+ StringRef StringValue) {}
+void ARMTargetStreamer::emitArch(unsigned Arch) {}
+void ARMTargetStreamer::emitObjectArch(unsigned Arch) {}
+void ARMTargetStreamer::emitFPU(unsigned FPU) {}
+void ARMTargetStreamer::finishAttributeSection() {}
+void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {}
+void
+ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {}
-void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
- llvm_unreachable("unimplemented");
-}
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index be29dc5..baa97a7 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -21,6 +21,9 @@
using namespace llvm;
+Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
+ : ARMFrameLowering(sti) {}
+
bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index f61874b..a227f8e 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -11,11 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef __THUMB_FRAMEINFO_H_
-#define __THUMB_FRAMEINFO_H_
+#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H
+#define LLVM_ARM_THUMB1FRAMELOWERING_H
#include "ARMFrameLowering.h"
-#include "ARMSubtarget.h"
#include "Thumb1InstrInfo.h"
#include "Thumb1RegisterInfo.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -24,9 +23,7 @@ namespace llvm {
class Thumb1FrameLowering : public ARMFrameLowering {
public:
- explicit Thumb1FrameLowering(const ARMSubtarget &sti)
- : ARMFrameLowering(sti) {
- }
+ explicit Thumb1FrameLowering(const ARMSubtarget &sti);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 6267ecf..09debe7 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -1010,7 +1010,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
AttributeSet FnAttrs = MF.getFunction()->getAttributes();
OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
- MinimizeSize = STI->isMinSize();
+ MinimizeSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
BlockInfo.clear();
BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 15b574d..f610fbb 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1577,6 +1577,10 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out) << iName << "->setName(\"";
printEscapedString(cxi->getName());
Out << "\");";
+ nl(Out) << iName << "->setVolatile("
+ << (cxi->isVolatile() ? "true" : "false") << ");";
+ nl(Out) << iName << "->setWeak("
+ << (cxi->isWeak() ? "true" : "false") << ");";
break;
}
case Instruction::AtomicRMW: {
@@ -1607,6 +1611,8 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out) << iName << "->setName(\"";
printEscapedString(rmwi->getName());
Out << "\");";
+ nl(Out) << iName << "->setVolatile("
+ << (rmwi->isVolatile() ? "true" : "false") << ");";
break;
}
case Instruction::LandingPad: {
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index d551ca9..21df12f 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -165,8 +165,8 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
}
// Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
// versions.
- if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret
- && !DisableDeallocRet) {
+ if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() &&
+ MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) {
// Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC
// instruction if we encounter it.
MachineBasicBlock::iterator BeforeJMPR =
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index 446af16..2d4b0b9 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -11,20 +11,16 @@
#define HEXAGON_FRAMEINFO_H
#include "Hexagon.h"
-#include "HexagonSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class HexagonFrameLowering : public TargetFrameLowering {
private:
- const HexagonSubtarget &STI;
void determineFrameLayout(MachineFunction &MF) const;
public:
- explicit HexagonFrameLowering(const HexagonSubtarget &sti)
- : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
- }
+ explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index b8e5d24..a460ea4 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -463,9 +463,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
SDValue StackPtr =
- DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
- getPointerTy());
+ DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy());
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -720,7 +721,10 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
cast<RegisterSDNode>(Node->getOperand(i))->getReg();
// Check it to be lr
- if (Reg == TM.getRegisterInfo()->getRARegister()) {
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
+ if (Reg == QRI->getRARegister()) {
FuncInfo->setHasClobberLR(true);
break;
}
@@ -812,9 +816,9 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// The Sub result contains the new stack start address, so it
// must be placed in the stack pointer register.
- SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
- TM.getRegisterInfo()->getStackRegister(),
- Sub);
+ const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>(
+ DAG.getTarget().getRegisterInfo());
+ SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub);
SDValue Ops[2] = { ArgAdjust, CopyChain };
return DAG.getMergeValues(Ops, dl);
@@ -944,21 +948,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
-HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue CC = Op.getOperand(4);
- SDValue TrueVal = Op.getOperand(2);
- SDValue FalseVal = Op.getOperand(3);
- SDLoc dl(Op);
- SDNode* OpNode = Op.getNode();
- EVT SVT = OpNode->getValueType(0);
-
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC);
- return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal);
-}
-
-SDValue
HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
EVT ValTy = Op.getValueType();
SDLoc dl(Op);
@@ -975,7 +964,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
- const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
@@ -1001,7 +990,8 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- const HexagonRegisterInfo *TRI = TM.getRegisterInfo();
+ const HexagonRegisterInfo *TRI =
+ static_cast<const HexagonRegisterInfo *>(DAG.getTarget().getRegisterInfo());
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
@@ -1053,429 +1043,422 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
-HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
- &targetmachine)
- : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
- TM(targetmachine) {
-
- const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
+HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine)
+ : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+ TM(targetmachine) {
- // Set up the register classes.
- addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
-
- if (QRI->Subtarget.hasV5TOps()) {
- addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
- addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
- }
+ const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>();
- addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
- computeRegisterProperties();
+ if (Subtarget.hasV5TOps()) {
+ addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
+ addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
+ }
- // Align loop entry
- setPrefLoopAlignment(4);
+ addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
- // Limits for inline expansion of memcpy/memmove
- MaxStoresPerMemcpy = 6;
- MaxStoresPerMemmove = 6;
+ computeRegisterProperties();
- //
- // Library calls for unsupported operations
- //
+ // Align loop entry
+ setPrefLoopAlignment(4);
- setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
- setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
-
- setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
- setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
-
- setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
- setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
-
- setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
-
- setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
- setOperationAction(ISD::SDIV, MVT::i64, Expand);
- setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
- setOperationAction(ISD::SREM, MVT::i64, Expand);
-
- setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
-
- setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
- setOperationAction(ISD::UDIV, MVT::i64, Expand);
-
- setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
- setOperationAction(ISD::UREM, MVT::i32, Expand);
-
- setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
- setOperationAction(ISD::FDIV, MVT::f32, Expand);
-
- setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
- setOperationAction(ISD::FDIV, MVT::f64, Expand);
-
- setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
-
- if (QRI->Subtarget.hasV5TOps()) {
- // Hexagon V5 Support.
- setOperationAction(ISD::FADD, MVT::f32, Legal);
- setOperationAction(ISD::FADD, MVT::f64, Legal);
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
-
- setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
- setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
-
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
-
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
-
- setOperationAction(ISD::FNEG, MVT::f32, Legal);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- } else {
+ // Limits for inline expansion of memcpy/memmove
+ MaxStoresPerMemcpy = 6;
+ MaxStoresPerMemmove = 6;
- // Expand fp<->uint.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ //
+ // Library calls for unsupported operations
+ //
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+ setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+
+ setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+ setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+ setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+ setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+ setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+ setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+ setOperationAction(ISD::FDIV, MVT::f32, Expand);
+
+ setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+ setOperationAction(ISD::FDIV, MVT::f64, Expand);
+
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+
+ if (Subtarget.hasV5TOps()) {
+ // Hexagon V5 Support.
+ setOperationAction(ISD::FADD, MVT::f32, Legal);
+ setOperationAction(ISD::FADD, MVT::f64, Legal);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Legal);
+
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Legal);
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Legal);
+
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
+
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal);
+
+ setOperationAction(ISD::FABS, MVT::f32, Legal);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
+ setOperationAction(ISD::FNEG, MVT::f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ } else {
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+ // Expand fp<->uint.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+ setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+ setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+ setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
- setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
- setOperationAction(ISD::FADD, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+ setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
- setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
- setOperationAction(ISD::FADD, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
- setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
+ setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+ setOperationAction(ISD::FADD, MVT::f64, Expand);
- setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+ setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+ setOperationAction(ISD::FADD, MVT::f32, Expand);
- setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
- setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand);
- setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
- setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
- setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
- setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+ setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
- setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
- setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
- setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
- setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
- setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
- setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
- setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
- setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
- setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+ setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
- setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
- setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
- setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
- setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+ setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
- setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
- setOperationAction(ISD::FMUL, MVT::f64, Expand);
+ setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
- setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
- setOperationAction(ISD::MUL, MVT::f32, Expand);
+ setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+ setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
- setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
- setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+ setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+ setOperationAction(ISD::FMUL, MVT::f64, Expand);
- setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+ setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+ setOperationAction(ISD::MUL, MVT::f32, Expand);
- setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
- setOperationAction(ISD::SUB, MVT::f64, Expand);
+ setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+ setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
- setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
- setOperationAction(ISD::SUB, MVT::f32, Expand);
+ setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
- setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
- setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+ setOperationAction(ISD::SUB, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+ setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+ setOperationAction(ISD::SUB, MVT::f32, Expand);
- setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
- setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+ setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
- setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+ setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
- setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
- setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+ setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+ setCondCodeAction(ISD::SETO, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::f32, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FNEG, MVT::f32, Expand);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- }
+ setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETO, MVT::f32, Expand);
- setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
- setOperationAction(ISD::SREM, MVT::i32, Expand);
-
- setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
-
- setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
- setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
-
- setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
-
- // Turn FP extload into load/fextend.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- // Hexagon has a i1 sign extending load.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
- // Turn FP truncstore into trunc + store.
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
- // Custom legalize GlobalAddress nodes into CONST32.
- setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
- setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
- setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
- // Truncate action?
- setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
-
- // Hexagon doesn't have sext_inreg, replace them with shl/sra.
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
-
- // Hexagon has no REM or DIVREM operations.
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
-
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
-
- // Lower SELECT_CC to SETCC and SELECT.
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-
- if (QRI->Subtarget.hasV5TOps()) {
-
- // We need to make the operation type of SELECT node to be Custom,
- // such that we don't go into the infinite loop of
- // select -> setcc -> select_cc -> select loop.
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
- setOperationAction(ISD::SELECT, MVT::f64, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+ setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
- } else {
-
- // Hexagon has no select or setcc: expand to SELECT_CC.
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f32, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::f32, Expand);
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ }
- // This is a workaround documented in DAGCombiner.cpp:2892 We don't
- // support SELECT_CC on every type.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // Turn FP extload into load/fextend.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ // Hexagon has a i1 sign extending load.
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ // Turn FP truncstore into trunc + store.
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // Custom legalize GlobalAddress nodes into CONST32.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ // Truncate action?
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+ // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // Hexagon has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ // Lower SELECT_CC to SETCC and SELECT.
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+
+ if (Subtarget.hasV5TOps()) {
+
+ // We need to make the operation type of SELECT node to be Custom,
+ // such that we don't go into the infinite loop of
+ // select -> setcc -> select_cc -> select loop.
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
+
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
- }
+ } else {
- if (EmitJumpTables) {
- setOperationAction(ISD::BR_JT, MVT::Other, Custom);
- } else {
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- }
- // Increase jump tables cutover to 5, was 4.
- setMinimumJumpTableEntries(5);
-
- setOperationAction(ISD::BR_CC, MVT::f32, Expand);
- setOperationAction(ISD::BR_CC, MVT::f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::BR_CC, MVT::i64, Expand);
-
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
-
- setOperationAction(ISD::FSIN , MVT::f64, Expand);
- setOperationAction(ISD::FCOS , MVT::f64, Expand);
- setOperationAction(ISD::FREM , MVT::f64, Expand);
- setOperationAction(ISD::FSIN , MVT::f32, Expand);
- setOperationAction(ISD::FCOS , MVT::f32, Expand);
- setOperationAction(ISD::FREM , MVT::f32, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-
- // In V4, we have double word add/sub with carry. The problem with
- // modelling this instruction is that it produces 2 results - Rdd and Px.
- // To model update of Px, we will have to use Defs[p0..p3] which will
- // cause any predicate live range to spill. So, we pretend we dont't
- // have these instructions.
- setOperationAction(ISD::ADDE, MVT::i8, Expand);
- setOperationAction(ISD::ADDE, MVT::i16, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i8, Expand);
- setOperationAction(ISD::SUBE, MVT::i16, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
- setOperationAction(ISD::ADDC, MVT::i8, Expand);
- setOperationAction(ISD::ADDC, MVT::i16, Expand);
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i8, Expand);
- setOperationAction(ISD::SUBC, MVT::i16, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i64, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ , MVT::i32, Expand);
- setOperationAction(ISD::CTLZ , MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::ROTL , MVT::i32, Expand);
- setOperationAction(ISD::ROTR , MVT::i32, Expand);
- setOperationAction(ISD::BSWAP, MVT::i32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FPOW , MVT::f64, Expand);
- setOperationAction(ISD::FPOW , MVT::f32, Expand);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
- setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-
- setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
-
- if (TM.getSubtargetImpl()->isSubtargetV2()) {
- setExceptionPointerRegister(Hexagon::R20);
- setExceptionSelectorRegister(Hexagon::R21);
- } else {
- setExceptionPointerRegister(Hexagon::R0);
- setExceptionSelectorRegister(Hexagon::R1);
- }
+ // Hexagon has no select or setcc: expand to SELECT_CC.
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
- // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
- setOperationAction(ISD::VASTART , MVT::Other, Custom);
+ if (EmitJumpTables) {
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+ } else {
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ }
+ // Increase jump tables cutover to 5, was 4.
+ setMinimumJumpTableEntries(5);
+
+ setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_CC, MVT::i64, Expand);
+
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+
+ // In V4, we have double word add/sub with carry. The problem with
+ // modelling this instruction is that it produces 2 results - Rdd and Px.
+ // To model update of Px, we will have to use Defs[p0..p3] which will
+ // cause any predicate live range to spill. So, we pretend we dont't
+ // have these instructions.
+ setOperationAction(ISD::ADDE, MVT::i8, Expand);
+ setOperationAction(ISD::ADDE, MVT::i16, Expand);
+ setOperationAction(ISD::ADDE, MVT::i32, Expand);
+ setOperationAction(ISD::ADDE, MVT::i64, Expand);
+ setOperationAction(ISD::SUBE, MVT::i8, Expand);
+ setOperationAction(ISD::SUBE, MVT::i16, Expand);
+ setOperationAction(ISD::SUBE, MVT::i32, Expand);
+ setOperationAction(ISD::SUBE, MVT::i64, Expand);
+ setOperationAction(ISD::ADDC, MVT::i8, Expand);
+ setOperationAction(ISD::ADDC, MVT::i16, Expand);
+ setOperationAction(ISD::ADDC, MVT::i32, Expand);
+ setOperationAction(ISD::ADDC, MVT::i64, Expand);
+ setOperationAction(ISD::SUBC, MVT::i8, Expand);
+ setOperationAction(ISD::SUBC, MVT::i16, Expand);
+ setOperationAction(ISD::SUBC, MVT::i32, Expand);
+ setOperationAction(ISD::SUBC, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+
+ if (Subtarget.isSubtargetV2()) {
+ setExceptionPointerRegister(Hexagon::R20);
+ setExceptionSelectorRegister(Hexagon::R21);
+ } else {
+ setExceptionPointerRegister(Hexagon::R0);
+ setExceptionSelectorRegister(Hexagon::R1);
+ }
- // Use the default implementation.
- setOperationAction(ISD::VAARG , MVT::Other, Expand);
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
- setOperationAction(ISD::VAEND , MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
- setOperationAction(ISD::INLINEASM , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
- setMinFunctionAlignment(2);
+ setMinFunctionAlignment(2);
- // Needed for DYNAMIC_STACKALLOC expansion.
- unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
- setStackPointerRegisterToSaveRestore(StackRegister);
- setSchedulingPreference(Sched::VLIW);
+ // Needed for DYNAMIC_STACKALLOC expansion.
+ const HexagonRegisterInfo *QRI =
+ static_cast<const HexagonRegisterInfo *>(TM.getRegisterInfo());
+ setStackPointerRegisterToSaveRestore(QRI->getStackRegister());
+ setSchedulingPreference(Sched::VLIW);
}
-
const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
@@ -1577,7 +1560,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SELECT: return Op;
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
@@ -1641,8 +1623,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- const HexagonRegisterInfo* QRI = TM.getRegisterInfo();
- return QRI->Subtarget.hasV5TOps();
+ return TM.getSubtarget<HexagonSubtarget>().hasV5TOps();
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 4f27c27..ec16cc8 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -74,8 +74,8 @@ namespace llvm {
unsigned& RetSize) const;
public:
- HexagonTargetMachine &TM;
- explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+ const TargetMachine &TM;
+ explicit HexagonTargetLowering(const TargetMachine &targetmachine);
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
@@ -124,7 +124,6 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDValue Callee) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index ea6367a..1c95e06 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const {
int NewOp = opc;
if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
NewOp = Hexagon::getPredOldOpcode(NewOp);
- if (NewOp < 0)
- assert(0 && "Couldn't change predicate new instruction to its old form.");
+ assert(NewOp >= 0 &&
+ "Couldn't change predicate new instruction to its old form.");
}
if (isNewValueStore(NewOp)) { // Convert into non-new-value format
NewOp = Hexagon::getNonNVStore(NewOp);
- if (NewOp < 0)
- assert(0 && "Couldn't change new-value store to its old form.");
+ assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
}
return NewOp;
}
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 7dd6e95..6fcaa20 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
#define DEBUG_TYPE "misched"
-/// Platform specific modifications to DAG.
+/// Platform-specific modifications to DAG.
void VLIWMachineScheduler::postprocessDAG() {
SUnit* LastSequentialCall = nullptr;
// Currently we only catch the situation when compare gets scheduled
@@ -150,7 +150,7 @@ void VLIWMachineScheduler::schedule() {
buildDAGWithRegPressure();
- // Postprocess the DAG to add platform specific artificial dependencies.
+ // Postprocess the DAG to add platform-specific artificial dependencies.
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 99100a1..8c41086 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -100,7 +100,7 @@ public:
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
virtual void schedule() override;
- /// Perform platform specific DAG postprocessing.
+ /// Perform platform-specific DAG postprocessing.
void postprocessDAG();
};
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 9e1e0fd..b5db997 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
bool llvm::flag_aligned_memcpy;
-HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
- &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 8ba6108..b40b303 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -18,11 +18,9 @@
namespace llvm {
-class HexagonTargetMachine;
-
class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+ explicit HexagonSelectionDAGInfo(const DataLayout &DL);
~HexagonSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 70c87fa..657893f 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -48,10 +48,8 @@ EnableIEEERndNear(
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Generate non-chopped conversion from fp to int."));
-HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
- HexagonGenSubtargetInfo(TT, CPU, FS),
- CPUString(CPU.str()) {
-
+HexagonSubtarget &
+HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
// If the programmer has not specified a Hexagon version, default to -mv4.
if (CPUString.empty())
CPUString = "hexagonv4";
@@ -70,6 +68,15 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
}
ParseSubtargetFeatures(CPUString, FS);
+ return *this;
+}
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM)
+ : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
+ DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL), FrameLowering() {
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUString);
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 690bef0..b184e62 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -14,6 +14,11 @@
#ifndef Hexagon_SUBTARGET_H
#define Hexagon_SUBTARGET_H
+#include "HexagonFrameLowering.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -28,6 +33,7 @@ namespace llvm {
class HexagonSubtarget : public HexagonGenSubtargetInfo {
virtual void anchor();
+
bool UseMemOps;
bool ModeIEEERndNear;
@@ -37,16 +43,35 @@ public:
};
HexagonArchEnum HexagonArchVersion;
+private:
std::string CPUString;
+ const DataLayout DL; // Calculates type size & alignment.
+ HexagonInstrInfo InstrInfo;
+ HexagonTargetLowering TLInfo;
+ HexagonSelectionDAGInfo TSInfo;
+ HexagonFrameLowering FrameLowering;
InstrItineraryData InstrItins;
public:
- HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+ HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const HexagonRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const HexagonTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const HexagonFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ const HexagonSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b923764..7831410 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -67,15 +67,10 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM,
- CodeModel::Model CM,
+ Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32") ,
- Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget),
- InstrItins(&Subtarget.getInstrItineraryData()) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 70b835e..d88178e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,12 +14,8 @@
#ifndef HexagonTARGETMACHINE_H
#define HexagonTARGETMACHINE_H
-#include "HexagonFrameLowering.h"
-#include "HexagonISelLowering.h"
#include "HexagonInstrInfo.h"
-#include "HexagonSelectionDAGInfo.h"
#include "HexagonSubtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -27,13 +23,7 @@ namespace llvm {
class Module;
class HexagonTargetMachine : public LLVMTargetMachine {
- const DataLayout DL; // Calculates type size & alignment.
HexagonSubtarget Subtarget;
- HexagonInstrInfo InstrInfo;
- HexagonTargetLowering TLInfo;
- HexagonSelectionDAGInfo TSInfo;
- HexagonFrameLowering FrameLowering;
- const InstrItineraryData* InstrItins;
public:
HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
@@ -42,33 +32,29 @@ public:
CodeGenOpt::Level OL);
const HexagonInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const HexagonSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const HexagonRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const InstrItineraryData* getInstrItineraryData() const override {
- return InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
-
-
const HexagonTargetLowering* getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
const HexagonFrameLowering* getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
-
const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
-
- const DataLayout *getDataLayout() const override { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index d464dd9..fadfeed 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -15,20 +15,15 @@
#define MSP430_FRAMEINFO_H
#include "MSP430.h"
-#include "MSP430Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class MSP430Subtarget;
-
class MSP430FrameLowering : public TargetFrameLowering {
protected:
- const MSP430Subtarget &STI;
public:
- explicit MSP430FrameLowering(const MSP430Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2),
- STI(sti) {}
+ explicit MSP430FrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c5901bc..3d3ee92 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -57,11 +57,8 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
"Assume hardware multiplier cannot be used inside interrupts"),
clEnumValEnd));
-MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
- TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()) {
-
- TD = getDataLayout();
+MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i8, &MSP430::GR8RegClass);
@@ -1032,7 +1029,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
- uint64_t SlotSize = TD->getPointerSize();
+ uint64_t SlotSize = getDataLayout()->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
true);
FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -1055,7 +1052,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset =
- DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ DAG.getConstant(getDataLayout()->getPointerSize(), MVT::i16);
return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, getPointerTy(),
FrameAddr, Offset),
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 3ced61d..3e2f344 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -66,12 +66,9 @@ namespace llvm {
};
}
- class MSP430Subtarget;
- class MSP430TargetMachine;
-
class MSP430TargetLowering : public TargetLowering {
public:
- explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+ explicit MSP430TargetLowering(const TargetMachine &TM);
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
@@ -170,9 +167,6 @@ namespace llvm {
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
-
- const MSP430Subtarget &Subtarget;
- const DataLayout *TD;
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 0c04ddb..ccb6c09 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -30,9 +30,9 @@ using namespace llvm;
// Pin the vtable to this file.
void MSP430InstrInfo::anchor() {}
-MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI)
: MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
- RI(tm) {}
+ RI() {}
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 1ffcebb..e6baaef 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -22,7 +22,7 @@
namespace llvm {
-class MSP430TargetMachine;
+class MSP430Subtarget;
/// MSP430II - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -44,7 +44,7 @@ class MSP430InstrInfo : public MSP430GenInstrInfo {
const MSP430RegisterInfo RI;
virtual void anchor();
public:
- explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+ explicit MSP430InstrInfo(MSP430Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 341fb64..691bcee 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -32,10 +32,8 @@ using namespace llvm;
#include "MSP430GenRegisterInfo.inc"
// FIXME: Provide proper call frame setup / destroy opcodes.
-MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm)
- : MSP430GenRegisterInfo(MSP430::PCW), TM(tm) {
- StackAlign = TM.getFrameLowering()->getStackAlignment();
-}
+MSP430RegisterInfo::MSP430RegisterInfo()
+ : MSP430GenRegisterInfo(MSP430::PCW) {}
const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index a607528..cb01961 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -21,18 +21,9 @@
namespace llvm {
-class TargetInstrInfo;
-class MSP430TargetMachine;
-
struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
-private:
- MSP430TargetMachine &TM;
-
- /// StackAlign - Default stack alignment.
- ///
- unsigned StackAlign;
public:
- MSP430RegisterInfo(MSP430TargetMachine &tm);
+ MSP430RegisterInfo();
/// Code Generation virtual methods...
const MCPhysReg *
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index c700383..3897ef6 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "msp430-selectiondag-info"
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
}
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index fa81948..cb04adc 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -22,7 +22,7 @@ class MSP430TargetMachine;
class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+ explicit MSP430SelectionDAGInfo(const DataLayout &DL);
~MSP430SelectionDAGInfo();
};
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 68ad091..dbddc52 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -25,12 +25,15 @@ using namespace llvm;
void MSP430Subtarget::anchor() { }
-MSP430Subtarget::MSP430Subtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS) :
- MSP430GenSubtargetInfo(TT, CPU, FS) {
- std::string CPUName = "generic";
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
+MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
+ ParseSubtargetFeatures("generic", FS);
+ return *this;
}
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM)
+ : MSP430GenSubtargetInfo(TT, CPU, FS),
+ // FIXME: Check DataLayout string.
+ DL("e-m:e-p:16:16-i32:16:32-n8:16"), FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL) {}
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 4d8792e..0152ad1 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -14,6 +14,12 @@
#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
#define LLVM_TARGET_MSP430_SUBTARGET_H
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -26,16 +32,33 @@ class StringRef;
class MSP430Subtarget : public MSP430GenSubtargetInfo {
virtual void anchor();
bool ExtendedInsts;
+ const DataLayout DL; // Calculates type size & alignment
+ MSP430FrameLowering FrameLowering;
+ MSP430InstrInfo InstrInfo;
+ MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
MSP430Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
+
+ MSP430Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const MSP430TargetLowering *getTargetLowering() const { return &TLInfo; }
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 50be2be..5ca36f2 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -24,19 +24,13 @@ extern "C" void LLVMInitializeMSP430Target() {
RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
}
-MSP430TargetMachine::MSP430TargetMachine(const Target &T,
- StringRef TT,
- StringRef CPU,
- StringRef FS,
+MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- // FIXME: Check DataLayout string.
- DL("e-m:e-p:16:16-i32:16:32-n8:16"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index ea5d407..efa8403 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,13 +15,7 @@
#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
#define LLVM_TARGET_MSP430_TARGETMACHINE_H
-#include "MSP430FrameLowering.h"
-#include "MSP430ISelLowering.h"
-#include "MSP430InstrInfo.h"
-#include "MSP430RegisterInfo.h"
-#include "MSP430SelectionDAGInfo.h"
#include "MSP430Subtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -31,11 +25,6 @@ namespace llvm {
///
class MSP430TargetMachine : public LLVMTargetMachine {
MSP430Subtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- MSP430InstrInfo InstrInfo;
- MSP430TargetLowering TLInfo;
- MSP430SelectionDAGInfo TSInfo;
- MSP430FrameLowering FrameLowering;
public:
MSP430TargetMachine(const Target &T, StringRef TT,
@@ -44,22 +33,25 @@ public:
CodeGenOpt::Level OL);
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const MSP430InstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ const MSP430Subtarget *getSubtargetImpl() const override {
+ return &Subtarget;
}
- const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL;}
- const MSP430Subtarget *getSubtargetImpl() const override { return &Subtarget; }
-
const TargetRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const MSP430TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
- const MSP430SelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk
index 4e8831c..9f437f8 100644
--- a/lib/Target/Mips/Android.mk
+++ b/lib/Target/Mips/Android.mk
@@ -8,6 +8,7 @@ mips_codegen_TBLGEN_TABLES := \
MipsGenMCPseudoLowering.inc \
MipsGenAsmWriter.inc \
MipsGenDAGISel.inc \
+ MipsGenFastISel.inc \
MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 86fd386..0c06be8 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -72,72 +72,69 @@ class MipsAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "MipsGenAsmMatcher.inc"
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseParenSuffix(StringRef Name,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ bool ParseParenSuffix(StringRef Name, OperandVector &Operands);
- bool ParseBracketSuffix(StringRef Name,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ bool ParseBracketSuffix(StringRef Name, OperandVector &Operands);
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- MipsAsmParser::OperandMatchResultTy
- parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
-
- MipsAsmParser::OperandMatchResultTy MatchAnyRegisterNameWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier,
- SMLoc S);
+ MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
MipsAsmParser::OperandMatchResultTy
- MatchAnyRegisterWithoutDollar(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- SMLoc S);
+ MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ StringRef Identifier, SMLoc S);
MipsAsmParser::OperandMatchResultTy
- ParseAnyRegister(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
- MipsAsmParser::OperandMatchResultTy
- ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseAnyRegister(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- ParseJumpTarget(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseImm(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseJumpTarget(OperandVector &Operands);
- MipsAsmParser::OperandMatchResultTy
- ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
- bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ MipsAsmParser::OperandMatchResultTy ParseLSAImm(OperandVector &Operands);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &,
- StringRef Mnemonic);
+ bool searchSymbolAlias(OperandVector &Operands);
+
+ bool ParseOperand(OperandVector &, StringRef Mnemonic);
bool needsExpansion(MCInst &Inst);
- void expandInstruction(MCInst &Inst, SMLoc IDLoc,
+ // Expands assembly pseudo instructions.
+ // Returns false on success, true otherwise.
+ bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
- void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
+
+ bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
+
void expandMemInst(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions, bool isLoad,
bool isImmOpnd);
- bool reportParseError(StringRef ErrorMsg);
- bool reportParseError(SMLoc Loc, StringRef ErrorMsg);
+ bool reportParseError(Twine ErrorMsg);
+ bool reportParseError(SMLoc Loc, Twine ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
@@ -159,32 +156,20 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseSetReorderDirective();
bool parseSetNoReorderDirective();
bool parseSetNoMips16Directive();
+ bool parseSetFpDirective();
bool parseSetAssignment();
bool parseDataDirective(unsigned Size, SMLoc L);
bool parseDirectiveGpWord();
bool parseDirectiveGpDWord();
+ bool parseDirectiveModule();
+ bool parseDirectiveModuleFP();
+ bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
+ StringRef Directive);
MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol);
- bool isGP64() const {
- return (STI.getFeatureBits() & Mips::FeatureGP64Bit) != 0;
- }
-
- bool isFP64() const {
- return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0;
- }
-
- bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
- bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
-
- bool isMicroMips() const {
- return STI.getFeatureBits() & Mips::FeatureMicroMips;
- }
-
- bool parseRegister(unsigned &RegNum);
-
bool eatComma(StringRef ErrorStr);
int matchCPURegisterName(StringRef Symbol);
@@ -205,7 +190,7 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getGPR(int RegNo);
- int getATReg();
+ int getATReg(SMLoc Loc);
bool processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
@@ -230,23 +215,85 @@ class MipsAsmParser : public MCTargetAsmParser {
}
public:
+ enum MipsMatchResultTy {
+ Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
+#define GET_OPERAND_DIAGNOSTIC_TYPES
+#include "MipsGenAsmMatcher.inc"
+#undef GET_OPERAND_DIAGNOSTIC_TYPES
+
+ };
+
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII,
- const MCTargetOptions &Options)
+ const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ getTargetStreamer().updateABIInfo(*this);
+
// Assert exactly one ABI was chosen.
assert((((STI.getFeatureBits() & Mips::FeatureO32) != 0) +
((STI.getFeatureBits() & Mips::FeatureEABI) != 0) +
((STI.getFeatureBits() & Mips::FeatureN32) != 0) +
((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1);
+
+ if (!isABI_O32() && !allowOddSPReg() != 0)
+ report_fatal_error("-mno-odd-spreg requires the O32 ABI");
}
MCAsmParser &getParser() const { return Parser; }
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+ /// True if all of $fcc0 - $fcc7 exist for the current ISA.
+ bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
+
+ bool isGP64bit() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
+ bool isFP64bit() const { return STI.getFeatureBits() & Mips::FeatureFP64Bit; }
+ bool isABI_N32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
+ bool isABI_N64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
+ bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
+ bool isABI_FPXX() const { return false; } // TODO: add check for FeatureXX
+
+ bool allowOddSPReg() const {
+ return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg);
+ }
+
+ bool inMicroMipsMode() const {
+ return STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
+ bool hasMips1() const { return STI.getFeatureBits() & Mips::FeatureMips1; }
+ bool hasMips2() const { return STI.getFeatureBits() & Mips::FeatureMips2; }
+ bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
+ bool hasMips4() const { return STI.getFeatureBits() & Mips::FeatureMips4; }
+ bool hasMips5() const { return STI.getFeatureBits() & Mips::FeatureMips5; }
+ bool hasMips32() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32);
+ }
+ bool hasMips64() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64);
+ }
+ bool hasMips32r2() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32r2);
+ }
+ bool hasMips64r2() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64r2);
+ }
+ bool hasMips32r6() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips32r6);
+ }
+ bool hasMips64r6() const {
+ return (STI.getFeatureBits() & Mips::FeatureMips64r6);
+ }
+ bool hasDSP() const { return (STI.getFeatureBits() & Mips::FeatureDSP); }
+ bool hasDSPR2() const { return (STI.getFeatureBits() & Mips::FeatureDSPR2); }
+ bool hasMSA() const { return (STI.getFeatureBits() & Mips::FeatureMSA); }
+
+ bool inMips16Mode() const {
+ return STI.getFeatureBits() & Mips::FeatureMips16;
+ }
+ // TODO: see how can we get this info.
+ bool mipsSEUsesSoftFloat() const { return false; }
+
/// Warn if RegNo is the current assembler temporary.
void WarnIfAssemblerTemporary(int RegNo, SMLoc Loc);
};
@@ -261,9 +308,9 @@ public:
/// Broad categories of register classes
/// The exact class is finalized by the render method.
enum RegKind {
- RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64())
+ RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64bit())
RegKind_FGR = 2, /// FGR32, FGR64, AFGR64 (depending on context and
- /// isFP64())
+ /// isFP64bit())
RegKind_FCC = 4, /// FCC
RegKind_MSA128 = 8, /// MSA128[BHWD] (makes no difference which)
RegKind_MSACtrl = 16, /// MSA control registers
@@ -289,9 +336,11 @@ private:
k_Token /// A simple token
} Kind;
+public:
MipsOperand(KindTy K, MipsAsmParser &Parser)
: MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {}
+private:
/// For diagnostics, and checking the assembler temporary
MipsAsmParser &AsmParser;
@@ -330,10 +379,11 @@ private:
SMLoc StartLoc, EndLoc;
/// Internal constructor for register kinds
- static MipsOperand *CreateReg(unsigned Index, RegKind RegKind,
- const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_RegisterIndex, Parser);
+ static std::unique_ptr<MipsOperand> CreateReg(unsigned Index, RegKind RegKind,
+ const MCRegisterInfo *RegInfo,
+ SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_RegisterIndex, Parser);
Op->RegIdx.Index = Index;
Op->RegIdx.RegInfo = RegInfo;
Op->RegIdx.Kind = RegKind;
@@ -521,6 +571,10 @@ public:
void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getFGR32Reg()));
+ // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
+ if (!AsmParser.allowOddSPReg() && RegIdx.Index & 1)
+ AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
+ "registers");
}
void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
@@ -612,6 +666,12 @@ public:
return Kind == k_Token;
}
bool isMem() const override { return Kind == k_Memory; }
+ bool isConstantMemOff() const {
+ return isMem() && dyn_cast<MCConstantExpr>(getMemOff());
+ }
+ template <unsigned Bits> bool isMemWithSimmOffset() const {
+ return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
+ }
bool isInvNum() const { return Kind == k_Immediate; }
bool isLSAImm() const {
if (!isConstantImm())
@@ -656,9 +716,13 @@ public:
return Mem.Off;
}
- static MipsOperand *CreateToken(StringRef Str, SMLoc S,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Token, Parser);
+ int64_t getConstantMemOff() const {
+ return static_cast<const MCConstantExpr *>(getMemOff())->getValue();
+ }
+
+ static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S,
+ MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Token, Parser);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -668,74 +732,75 @@ public:
/// Create a numeric register (e.g. $1). The exact register remains
/// unresolved until an instruction successfully matches
- static MipsOperand *CreateNumericReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
DEBUG(dbgs() << "CreateNumericReg(" << Index << ", ...)\n");
return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a GPR.
/// This is typically only used for named registers such as $gp.
- static MipsOperand *CreateGPRReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser);
}
/// Create a register that is definitely a FGR.
/// This is typically only used for named registers such as $f0.
- static MipsOperand *CreateFGRReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an FCC.
/// This is typically only used for named registers such as $fcc0.
- static MipsOperand *CreateFCCReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an ACC.
/// This is typically only used for named registers such as $ac0.
- static MipsOperand *CreateACCReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
+ MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an MSA128.
/// This is typically only used for named registers such as $w0.
- static MipsOperand *CreateMSA128Reg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser);
}
/// Create a register that is definitely an MSACtrl.
/// This is typically only used for named registers such as $msaaccess.
- static MipsOperand *CreateMSACtrlReg(unsigned Index,
- const MCRegisterInfo *RegInfo, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
+ static std::unique_ptr<MipsOperand>
+ CreateMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser);
}
- static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Immediate, Parser);
+ static std::unique_ptr<MipsOperand>
+ CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Immediate, Parser);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static MipsOperand *CreateMem(MipsOperand *Base, const MCExpr *Off, SMLoc S,
- SMLoc E, MipsAsmParser &Parser) {
- MipsOperand *Op = new MipsOperand(k_Memory, Parser);
- Op->Mem.Base = Base;
+ static std::unique_ptr<MipsOperand>
+ CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S,
+ SMLoc E, MipsAsmParser &Parser) {
+ auto Op = make_unique<MipsOperand>(k_Memory, Parser);
+ Op->Mem.Base = Base.release();
Op->Mem.Off = Off;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -756,7 +821,11 @@ public:
return isRegIdx() && RegIdx.Kind & RegKind_CCR && RegIdx.Index <= 31;
}
bool isFCCAsmReg() const {
- return isRegIdx() && RegIdx.Kind & RegKind_FCC && RegIdx.Index <= 7;
+ if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC))
+ return false;
+ if (!AsmParser.hasEightFccRegisters())
+ return RegIdx.Index == 0;
+ return RegIdx.Index <= 7;
}
bool isACCAsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3;
@@ -849,9 +918,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Offset = Inst.getOperand(2);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm()))
+ if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2)))
+ if (OffsetToAlignment(Offset.getImm(),
+ 1LL << (inMicroMipsMode() ? 1 : 2)))
return Error(IDLoc, "branch to misaligned address");
break;
case Mips::BGEZ:
@@ -874,14 +944,23 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
Offset = Inst.getOperand(1);
if (!Offset.isImm())
break; // We'll deal with this situation later on when applying fixups.
- if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm()))
+ if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm()))
return Error(IDLoc, "branch target out of range");
- if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2)))
+ if (OffsetToAlignment(Offset.getImm(),
+ 1LL << (inMicroMipsMode() ? 1 : 2)))
return Error(IDLoc, "branch to misaligned address");
break;
}
}
+ // SSNOP is deprecated on MIPS32r6/MIPS64r6
+ // We still accept it but it is a normal nop.
+ if (hasMips32r6() && Inst.getOpcode() == Mips::SSNOP) {
+ std::string ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
+ Warning(IDLoc, "ssnop is deprecated for " + ISA + " and is equivalent to a "
+ "nop instruction");
+ }
+
if (MCID.hasDelaySlot() && Options.isReorder()) {
// If this instruction has a delay slot and .set reorder is active,
// emit a NOP after it.
@@ -930,7 +1009,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
} // if load/store
if (needsExpansion(Inst))
- expandInstruction(Inst, IDLoc, Instructions);
+ return expandInstruction(Inst, IDLoc, Instructions);
else
Instructions.push_back(Inst);
@@ -943,17 +1022,27 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
case Mips::LoadImm32Reg:
case Mips::LoadAddr32Imm:
case Mips::LoadAddr32Reg:
+ case Mips::LoadImm64Reg:
return true;
default:
return false;
}
}
-void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
+bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
switch (Inst.getOpcode()) {
+ default:
+ assert(0 && "unimplemented expansion");
+ return true;
case Mips::LoadImm32Reg:
return expandLoadImm(Inst, IDLoc, Instructions);
+ case Mips::LoadImm64Reg:
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+ return expandLoadImm(Inst, IDLoc, Instructions);
case Mips::LoadAddr32Imm:
return expandLoadAddressImm(Inst, IDLoc, Instructions);
case Mips::LoadAddr32Reg:
@@ -961,7 +1050,31 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
}
}
-void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
+namespace {
+template <int Shift, bool PerformShift>
+void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc,
+ SmallVectorImpl<MCInst> &Instructions) {
+ MCInst tmpInst;
+ if (PerformShift) {
+ tmpInst.setOpcode(Mips::DSLL);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateImm(16));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+ tmpInst.clear();
+ }
+ tmpInst.setOpcode(Mips::ORi);
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ tmpInst.addOperand(
+ MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift)));
+ tmpInst.setLoc(IDLoc);
+ Instructions.push_back(tmpInst);
+}
+}
+
+bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
const MCOperand &ImmOp = Inst.getOperand(1);
@@ -969,8 +1082,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
const MCOperand &RegOp = Inst.getOperand(0);
assert(RegOp.isReg() && "expected register operand kind");
- int ImmValue = ImmOp.getImm();
+ int64_t ImmValue = ImmOp.getImm();
tmpInst.setLoc(IDLoc);
+ // FIXME: gas has a special case for values that are 000...1111, which
+ // becomes a li -1 and then a dsrl
if (0 <= ImmValue && ImmValue <= 65535) {
// For 0 <= j <= 65535.
// li d,j => ori d,$zero,j
@@ -987,25 +1102,76 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO));
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
- } else {
- // For any other value of j that is representable as a 32-bit integer.
+ } else if ((ImmValue & 0xffffffff) == ImmValue) {
+ // For any value of j that is representable as a 32-bit integer, create
+ // a sequence of:
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16));
Instructions.push_back(tmpInst);
- tmpInst.clear();
- tmpInst.setOpcode(Mips::ORi);
+ createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ } else if ((ImmValue & (0xffffLL << 48)) == 0) {
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // <------- lo32 ------>
+ // <------- hi32 ------>
+ // <- hi16 -> <- lo16 ->
+ // _________________________________
+ // | | | |
+ // | 16-bytes | 16-bytes | 16-bytes |
+ // |__________|__________|__________|
+ //
+ // For any value of j that is representable as a 48-bit integer, create
+ // a sequence of:
+ // li d,j => lui d,hi16(j)
+ // ori d,d,hi16(lo32(j))
+ // dsll d,d,16
+ // ori d,d,lo16(lo32(j))
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
+ tmpInst.addOperand(
+ MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32));
+ Instructions.push_back(tmpInst);
+ createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ } else {
+ if (!isGP64bit()) {
+ Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+ return true;
+ }
+
+ // <------- hi32 ------> <------- lo32 ------>
+ // <- hi16 -> <- lo16 ->
+ // ___________________________________________
+ // | | | | |
+ // | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
+ // |__________|__________|__________|__________|
+ //
+ // For any value of j that isn't representable as a 48-bit integer.
+ // li d,j => lui d,hi16(j)
+ // ori d,d,lo16(hi32(j))
+ // dsll d,d,16
+ // ori d,d,hi16(lo32(j))
+ // dsll d,d,16
+ // ori d,d,lo16(lo32(j))
+ tmpInst.setOpcode(Mips::LUi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
- tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
- tmpInst.setLoc(IDLoc);
+ tmpInst.addOperand(
+ MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48));
Instructions.push_back(tmpInst);
+ createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
+ createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions);
}
+ return false;
}
-void
+bool
MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
@@ -1046,9 +1212,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg()));
Instructions.push_back(tmpInst);
}
+ return false;
}
-void
+bool
MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions) {
MCInst tmpInst;
@@ -1080,6 +1247,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff));
Instructions.push_back(tmpInst);
}
+ return false;
}
void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
@@ -1090,8 +1258,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
unsigned ImmOffset, HiOffset, LoOffset;
const MCExpr *ExprOffset;
unsigned TmpRegNum;
- unsigned AtRegNum = getReg(
- (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg());
// 1st operand is either the source or destination register.
assert(Inst.getOperand(0).isReg() && "expected register operand kind");
unsigned RegOpNum = Inst.getOperand(0).getReg();
@@ -1111,10 +1277,46 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
ExprOffset = Inst.getOperand(2).getExpr();
// All instructions will have the same location.
TempInst.setLoc(IDLoc);
- // 1st instruction in expansion is LUi. For load instruction we can use
- // the dst register as a temporary if base and dst are different,
- // but for stores we must use $at.
- TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum;
+ // These are some of the types of expansions we perform here:
+ // 1) lw $8, sym => lui $8, %hi(sym)
+ // lw $8, %lo(sym)($8)
+ // 2) lw $8, offset($9) => lui $8, %hi(offset)
+ // add $8, $8, $9
+ // lw $8, %lo(offset)($9)
+ // 3) lw $8, offset($8) => lui $at, %hi(offset)
+ // add $at, $at, $8
+ // lw $8, %lo(offset)($at)
+ // 4) sw $8, sym => lui $at, %hi(sym)
+ // sw $8, %lo(sym)($at)
+ // 5) sw $8, offset($8) => lui $at, %hi(offset)
+ // add $at, $at, $8
+ // sw $8, %lo(offset)($at)
+ // 6) ldc1 $f0, sym => lui $at, %hi(sym)
+ // ldc1 $f0, %lo(sym)($at)
+ //
+ // For load instructions we can use the destination register as a temporary
+ // if base and dst are different (examples 1 and 2) and if the base register
+ // is general purpose otherwise we must use $at (example 6) and error if it's
+ // not available. For stores we must use $at (examples 4 and 5) because we
+ // must not clobber the source register setting up the offset.
+ const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode());
+ int16_t RegClassOp0 = Desc.OpInfo[0].RegClass;
+ unsigned RegClassIDOp0 =
+ getContext().getRegisterInfo()->getRegClass(RegClassOp0).getID();
+ bool IsGPR = (RegClassIDOp0 == Mips::GPR32RegClassID) ||
+ (RegClassIDOp0 == Mips::GPR64RegClassID);
+ if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
+ TmpRegNum = RegOpNum;
+ else {
+ int AT = getATReg(IDLoc);
+ // At this point we need AT to perform the expansions and we exit if it is
+ // not available.
+ if (!AT)
+ return;
+ TmpRegNum = getReg(
+ (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
+ }
+
TempInst.setOpcode(Mips::LUi);
TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
if (isImmOpnd)
@@ -1164,10 +1366,24 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
TempInst.clear();
}
-bool MipsAsmParser::MatchAndEmitInstruction(
- SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, MCStreamer &Out,
- unsigned &ErrorInfo, bool MatchingInlineAsm) {
+unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ // As described by the Mips32r2 spec, the registers Rd and Rs for
+ // jalr.hb must be different.
+ unsigned Opcode = Inst.getOpcode();
+
+ if (Opcode == Mips::JALR_HB &&
+ (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()))
+ return Match_RequiresDifferentSrcAndDst;
+
+ return Match_Success;
+}
+
+bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
unsigned MatchResult =
@@ -1192,7 +1408,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((MipsOperand *)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -1201,6 +1417,8 @@ bool MipsAsmParser::MatchAndEmitInstruction(
}
case Match_MnemonicFail:
return Error(IDLoc, "invalid instruction");
+ case Match_RequiresDifferentSrcAndDst:
+ return Error(IDLoc, "source and destination must be different");
}
return true;
}
@@ -1254,7 +1472,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) {
.Case("t9", 25)
.Default(-1);
- if (isN32() || isN64()) {
+ if (isABI_N32() || isABI_N64()) {
// Although SGI documentation just cuts out t0-t3 for n32/n64,
// GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7
// We are supporting both cases, so for t0-t3 we'll just push them to t4-t7.
@@ -1354,10 +1572,11 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) {
return true;
}
-int MipsAsmParser::getATReg() {
+int MipsAsmParser::getATReg(SMLoc Loc) {
int AT = Options.getATRegNum();
if (AT == 0)
- TokError("Pseudo instruction requires $at, which is not available");
+ reportParseError(Loc,
+ "Pseudo instruction requires $at, which is not available");
return AT;
}
@@ -1366,7 +1585,7 @@ unsigned MipsAsmParser::getReg(int RC, int RegNo) {
}
unsigned MipsAsmParser::getGPR(int RegNo) {
- return getReg(isGP64() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID,
+ return getReg(isGP64bit() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID,
RegNo);
}
@@ -1378,9 +1597,7 @@ int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) {
return getReg(RegClass, RegNum);
}
-bool
-MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- StringRef Mnemonic) {
+bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) {
DEBUG(dbgs() << "ParseOperand\n");
// Check if the current operand has a custom associated parser, if so, try to
@@ -1431,6 +1648,7 @@ MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands,
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
+ case AsmToken::Tilde:
case AsmToken::String: {
DEBUG(dbgs() << ".. generic integer\n");
OperandMatchResultTy ResTy = ParseImm(Operands);
@@ -1578,11 +1796,11 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
- SmallVector<MCParsedAsmOperand *, 1> Operands;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
OperandMatchResultTy ResTy = ParseAnyRegister(Operands);
if (ResTy == MatchOperand_Success) {
assert(Operands.size() == 1);
- MipsOperand &Operand = *static_cast<MipsOperand *>(Operands.front());
+ MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front());
StartLoc = Operand.getStartLoc();
EndLoc = Operand.getEndLoc();
@@ -1592,11 +1810,9 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
// register is a parse error.
if (Operand.isGPRAsmReg()) {
// Resolve to GPR32 or GPR64 appropriately.
- RegNo = isGP64() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
+ RegNo = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
}
- delete &Operand;
-
return (RegNo == (unsigned)-1);
}
@@ -1632,8 +1848,8 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
return Result;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::parseMemOperand(OperandVector &Operands) {
DEBUG(dbgs() << "parseMemOperand\n");
const MCExpr *IdVal = nullptr;
SMLoc S;
@@ -1653,8 +1869,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
- MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]);
- if (Mnemonic->getToken() == "la") {
+ MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]);
+ if (Mnemonic.getToken() == "la") {
SMLoc E =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
@@ -1666,9 +1882,10 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
// Zero register assumed, add a memory operand with ZERO as its base.
// "Base" will be managed by k_Memory.
- MipsOperand *Base = MipsOperand::CreateGPRReg(
- 0, getContext().getRegisterInfo(), S, E, *this);
- Operands.push_back(MipsOperand::CreateMem(Base, IdVal, S, E, *this));
+ auto Base = MipsOperand::CreateGPRReg(0, getContext().getRegisterInfo(),
+ S, E, *this);
+ Operands.push_back(
+ MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
return MatchOperand_Success;
}
Error(Parser.getTok().getLoc(), "'(' expected");
@@ -1695,7 +1912,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
IdVal = MCConstantExpr::Create(0, getContext());
// Replace the register operand with the memory operand.
- MipsOperand *op = static_cast<MipsOperand *>(Operands.back());
+ std::unique_ptr<MipsOperand> op(
+ static_cast<MipsOperand *>(Operands.back().release()));
// Remove the register from the operands.
// "op" will be managed by k_Memory.
Operands.pop_back();
@@ -1709,12 +1927,11 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
getContext());
}
- Operands.push_back(MipsOperand::CreateMem(op, IdVal, S, E, *this));
+ Operands.push_back(MipsOperand::CreateMem(std::move(op), IdVal, S, E, *this));
return MatchOperand_Success;
}
-bool MipsAsmParser::searchSymbolAlias(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier());
if (Sym) {
@@ -1740,9 +1957,8 @@ bool MipsAsmParser::searchSymbolAlias(
} else if (Expr->getKind() == MCExpr::Constant) {
Parser.Lex();
const MCConstantExpr *Const = static_cast<const MCConstantExpr *>(Expr);
- MipsOperand *op =
- MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this);
- Operands.push_back(op);
+ Operands.push_back(
+ MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this));
return true;
}
}
@@ -1750,9 +1966,9 @@ bool MipsAsmParser::searchSymbolAlias(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterNameWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier,
- SMLoc S) {
+MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ StringRef Identifier,
+ SMLoc S) {
int Index = matchCPURegisterName(Identifier);
if (Index != -1) {
Operands.push_back(MipsOperand::CreateGPRReg(
@@ -1799,8 +2015,7 @@ MipsAsmParser::MatchAnyRegisterNameWithoutDollar(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::MatchAnyRegisterWithoutDollar(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands, SMLoc S) {
+MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
auto Token = Parser.getLexer().peekTok(false);
if (Token.is(AsmToken::Identifier)) {
@@ -1822,8 +2037,8 @@ MipsAsmParser::MatchAnyRegisterWithoutDollar(
return MatchOperand_NoMatch;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::ParseAnyRegister(OperandVector &Operands) {
DEBUG(dbgs() << "ParseAnyRegister\n");
auto Token = Parser.getTok();
@@ -1850,7 +2065,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::ParseImm(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -1858,6 +2073,7 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Integer:
+ case AsmToken::Tilde:
case AsmToken::String:
break;
}
@@ -1872,8 +2088,8 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
return MatchOperand_Success;
}
-MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::OperandMatchResultTy
+MipsAsmParser::ParseJumpTarget(OperandVector &Operands) {
DEBUG(dbgs() << "ParseJumpTarget\n");
SMLoc S = getLexer().getLoc();
@@ -1899,7 +2115,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget(
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::parseInvNum(OperandVector &Operands) {
const MCExpr *IdVal;
// If the first token is '$' we may have register operand.
if (Parser.getTok().is(AsmToken::Dollar))
@@ -1917,7 +2133,7 @@ MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
}
MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+MipsAsmParser::ParseLSAImm(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
@@ -1996,8 +2212,7 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
/// ::= '(', register, ')'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseParenSuffix(
- StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) {
if (getLexer().is(AsmToken::LParen)) {
Operands.push_back(
MipsOperand::CreateToken("(", getLexer().getLoc(), *this));
@@ -2025,8 +2240,8 @@ bool MipsAsmParser::ParseParenSuffix(
/// ::= '[', integer, ']'
/// handle it before we iterate so we don't get tripped up by the lack of
/// a comma.
-bool MipsAsmParser::ParseBracketSuffix(
- StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseBracketSuffix(StringRef Name,
+ OperandVector &Operands) {
if (getLexer().is(AsmToken::LBrac)) {
Operands.push_back(
MipsOperand::CreateToken("[", getLexer().getLoc(), *this));
@@ -2048,10 +2263,12 @@ bool MipsAsmParser::ParseBracketSuffix(
return false;
}
-bool MipsAsmParser::ParseInstruction(
- ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
+bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
DEBUG(dbgs() << "ParseInstruction\n");
+ // We have reached first instruction, module directive after
+ // this is forbidden.
+ getTargetStreamer().setCanHaveModuleDir(false);
// Check if we have valid mnemonic
if (!mnemonicIsValid(Name, 0)) {
Parser.eatToEndOfStatement();
@@ -2098,13 +2315,13 @@ bool MipsAsmParser::ParseInstruction(
return false;
}
-bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
+bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
SMLoc Loc = getLexer().getLoc();
Parser.eatToEndOfStatement();
return Error(Loc, ErrorMsg);
}
-bool MipsAsmParser::reportParseError(SMLoc Loc, StringRef ErrorMsg) {
+bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) {
return Error(Loc, ErrorMsg);
}
@@ -2238,6 +2455,32 @@ bool MipsAsmParser::parseSetNoMips16Directive() {
return false;
}
+bool MipsAsmParser::parseSetFpDirective() {
+ MipsABIFlagsSection::FpABIKind FpAbiVal;
+ // Line can be: .set fp=32
+ // .set fp=xx
+ // .set fp=64
+ Parser.Lex(); // Eat fp token
+ AsmToken Tok = Parser.getTok();
+ if (Tok.isNot(AsmToken::Equal)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Eat '=' token.
+ Tok = Parser.getTok();
+
+ if (!parseFpABIValue(FpAbiVal, ".set"))
+ return false;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ getTargetStreamer().emitDirectiveSetFp(FpAbiVal);
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
bool MipsAsmParser::parseSetAssignment() {
StringRef Name;
const MCExpr *Value;
@@ -2296,25 +2539,6 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
return false;
}
-bool MipsAsmParser::parseRegister(unsigned &RegNum) {
- if (!getLexer().is(AsmToken::Dollar))
- return false;
-
- Parser.Lex();
-
- const AsmToken &Reg = Parser.getTok();
- if (Reg.is(AsmToken::Identifier)) {
- RegNum = matchCPURegisterName(Reg.getIdentifier());
- } else if (Reg.is(AsmToken::Integer)) {
- RegNum = Reg.getIntVal();
- } else {
- return false;
- }
-
- Parser.Lex();
- return true;
-}
-
bool MipsAsmParser::eatComma(StringRef ErrorStr) {
if (getLexer().isNot(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
@@ -2332,21 +2556,20 @@ bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
// FIXME: Warn if cpload is used in Mips16 mode.
- SmallVector<MCParsedAsmOperand *, 1> Reg;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
OperandMatchResultTy ResTy = ParseAnyRegister(Reg);
if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
reportParseError("expected register containing function address");
return false;
}
- MipsOperand *RegOpnd = static_cast<MipsOperand *>(Reg[0]);
- if (!RegOpnd->isGPRAsmReg()) {
- reportParseError(RegOpnd->getStartLoc(), "invalid register");
+ MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]);
+ if (!RegOpnd.isGPRAsmReg()) {
+ reportParseError(RegOpnd.getStartLoc(), "invalid register");
return false;
}
- getTargetStreamer().emitDirectiveCpload(RegOpnd->getGPR32Reg());
- delete RegOpnd;
+ getTargetStreamer().emitDirectiveCpload(RegOpnd.getGPR32Reg());
return false;
}
@@ -2355,23 +2578,48 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
unsigned Save;
bool SaveIsReg = true;
- if (!parseRegister(FuncReg))
- return reportParseError("expected register containing function address");
- FuncReg = getGPR(FuncReg);
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
+ OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch) {
+ reportParseError("expected register containing function address");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!FuncRegOpnd.isGPRAsmReg()) {
+ reportParseError(FuncRegOpnd.getStartLoc(), "invalid register");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+
+ FuncReg = FuncRegOpnd.getGPR32Reg();
+ TmpReg.clear();
if (!eatComma("expected comma parsing directive"))
return true;
- if (!parseRegister(Save)) {
+ ResTy = ParseAnyRegister(TmpReg);
+ if (ResTy == MatchOperand_NoMatch) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Integer)) {
Save = Tok.getIntVal();
SaveIsReg = false;
Parser.Lex();
- } else
- return reportParseError("expected save register or stack offset");
- } else
- Save = getGPR(Save);
+ } else {
+ reportParseError("expected save register or stack offset");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ } else {
+ MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
+ if (!SaveOpnd.isGPRAsmReg()) {
+ reportParseError(SaveOpnd.getStartLoc(), "invalid register");
+ Parser.eatToEndOfStatement();
+ return false;
+ }
+ Save = SaveOpnd.getGPR32Reg();
+ }
if (!eatComma("expected comma parsing directive"))
return true;
@@ -2414,6 +2662,8 @@ bool MipsAsmParser::parseDirectiveSet() {
return parseSetNoAtDirective();
} else if (Tok.getString() == "at") {
return parseSetAtDirective();
+ } else if (Tok.getString() == "fp") {
+ return parseSetFpDirective();
} else if (Tok.getString() == "reorder") {
return parseSetReorderDirective();
} else if (Tok.getString() == "noreorder") {
@@ -2546,6 +2796,134 @@ bool MipsAsmParser::parseDirectiveOption() {
return false;
}
+/// parseDirectiveModule
+/// ::= .module oddspreg
+/// ::= .module nooddspreg
+/// ::= .module fp=value
+bool MipsAsmParser::parseDirectiveModule() {
+ MCAsmLexer &Lexer = getLexer();
+ SMLoc L = Lexer.getLoc();
+
+ if (!getTargetStreamer().getCanHaveModuleDir()) {
+ // TODO : get a better message.
+ reportParseError(".module directive must appear before any code");
+ return false;
+ }
+
+ if (Lexer.is(AsmToken::Identifier)) {
+ StringRef Option = Parser.getTok().getString();
+ Parser.Lex();
+
+ if (Option == "oddspreg") {
+ getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32());
+ clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("Expected end of statement");
+ return false;
+ }
+
+ return false;
+ } else if (Option == "nooddspreg") {
+ if (!isABI_O32()) {
+ Error(L, "'.module nooddspreg' requires the O32 ABI");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32());
+ setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("Expected end of statement");
+ return false;
+ }
+
+ return false;
+ } else if (Option == "fp") {
+ return parseDirectiveModuleFP();
+ }
+
+ return Error(L, "'" + Twine(Option) + "' is not a valid .module option.");
+ }
+
+ return false;
+}
+
+/// parseDirectiveModuleFP
+/// ::= =32
+/// ::= =xx
+/// ::= =64
+bool MipsAsmParser::parseDirectiveModuleFP() {
+ MCAsmLexer &Lexer = getLexer();
+
+ if (Lexer.isNot(AsmToken::Equal)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+ Parser.Lex(); // Eat '=' token.
+
+ MipsABIFlagsSection::FpABIKind FpABI;
+ if (!parseFpABIValue(FpABI, ".module"))
+ return false;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token in statement");
+ return false;
+ }
+
+ // Emit appropriate flags.
+ getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32());
+ Parser.Lex(); // Consume the EndOfStatement.
+ return false;
+}
+
+bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
+ StringRef Directive) {
+ MCAsmLexer &Lexer = getLexer();
+
+ if (Lexer.is(AsmToken::Identifier)) {
+ StringRef Value = Parser.getTok().getString();
+ Parser.Lex();
+
+ if (Value != "xx") {
+ reportParseError("unsupported value, expected 'xx', '32' or '64'");
+ return false;
+ }
+
+ if (!isABI_O32()) {
+ reportParseError("'" + Directive + " fp=xx' requires the O32 ABI");
+ return false;
+ }
+
+ FpABI = MipsABIFlagsSection::FpABIKind::XX;
+ return true;
+ }
+
+ if (Lexer.is(AsmToken::Integer)) {
+ unsigned Value = Parser.getTok().getIntVal();
+ Parser.Lex();
+
+ if (Value != 32 && Value != 64) {
+ reportParseError("unsupported value, expected 'xx', '32' or '64'");
+ return false;
+ }
+
+ if (Value == 32) {
+ if (!isABI_O32()) {
+ reportParseError("'" + Directive + " fp=32' requires the O32 ABI");
+ return false;
+ }
+
+ FpABI = MipsABIFlagsSection::FpABIKind::S32;
+ } else
+ FpABI = MipsABIFlagsSection::FpABIKind::S64;
+
+ return true;
+ }
+
+ return false;
+}
+
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@@ -2624,6 +3002,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".cpsetup")
return parseDirectiveCPSetup();
+ if (IDVal == ".module")
+ return parseDirectiveModule();
+
return true;
}
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 95670aa..902b877 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -57,16 +57,24 @@ class MipsDisassembler : public MipsDisassemblerBase {
public:
/// Constructor - Initializes the disassembler.
///
- MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
- bool bigEndian) :
- MipsDisassemblerBase(STI, Ctx, bigEndian) {
- IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
- }
+ MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian)
+ : MipsDisassemblerBase(STI, Ctx, bigEndian) {
+ IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
+ }
- bool isMips32r6() const {
+ bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; }
+ bool hasMips32() const { return STI.getFeatureBits() & Mips::FeatureMips32; }
+ bool hasMips32r6() const {
return STI.getFeatureBits() & Mips::FeatureMips32r6;
}
+ bool isGP64() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; }
+
+ bool hasCOP3() const {
+ // Only present in MIPS-I and MIPS-II
+ return !hasMips32() && !hasMips3();
+ }
+
/// getInstruction - See MCDisassembler.
DecodeStatus getInstruction(MCInst &instr,
uint64_t &size,
@@ -149,6 +157,10 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -260,6 +272,11 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeSimm16(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -285,6 +302,9 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
/// handle.
template <typename InsnType>
@@ -316,6 +336,11 @@ static DecodeStatus
DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
+template <typename InsnType>
+static DecodeStatus
+DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
namespace llvm {
extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
TheMips64elTarget;
@@ -511,6 +536,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
if (Rt == 0)
return MCDisassembler::Fail;
@@ -518,8 +544,14 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
MI.setOpcode(Mips::BLEZC);
else if (Rs == Rt)
MI.setOpcode(Mips::BGEZC);
- else
- return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet.
+ else {
+ HasRs = true;
+ MI.setOpcode(Mips::BGEC);
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
Rt)));
@@ -544,6 +576,8 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
// BLTZC if rs == rt && rt != 0
// BLTC if rs != rt && rs != 0 && rt != 0
+ bool HasRs = false;
+
InsnType Rs = fieldFromInstruction(insn, 21, 5);
InsnType Rt = fieldFromInstruction(insn, 16, 5);
InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
@@ -554,8 +588,14 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
MI.setOpcode(Mips::BGTZC);
else if (Rs == Rt)
MI.setOpcode(Mips::BLTZC);
- else
- return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet.
+ else {
+ MI.setOpcode(Mips::BLTC);
+ HasRs = true;
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
Rt)));
@@ -595,8 +635,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
} else if (Rs == Rt) {
MI.setOpcode(Mips::BLTZALC);
HasRs = true;
- } else
- return MCDisassembler::Fail; // BLTUC not implemented yet
+ } else {
+ MI.setOpcode(Mips::BLTUC);
+ HasRs = true;
+ HasRt = true;
+ }
if (HasRs)
MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
@@ -611,6 +654,48 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
return MCDisassembler::Success;
}
+template <typename InsnType>
+static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the BLEZL instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b000110 sssss ttttt iiiiiiiiiiiiiiii
+ // Invalid if rs == 0
+ // BLEZALC if rs == 0 && rt != 0
+ // BGEZALC if rs == rt && rt != 0
+ // BGEUC if rs != rt && rs != 0 && rt != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
+
+ if (Rt == 0)
+ return MCDisassembler::Fail;
+ else if (Rs == 0)
+ MI.setOpcode(Mips::BLEZALC);
+ else if (Rs == Rt)
+ MI.setOpcode(Mips::BGEZALC);
+ else {
+ HasRs = true;
+ MI.setOpcode(Mips::BGEUC);
+ }
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
/// readInstruction - read four bytes from the MemoryObject
/// and return 32 bit word sorted according to the given endianess
static DecodeStatus readInstruction32(const MemoryObject &region,
@@ -670,6 +755,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
if (IsMicroMips) {
+ DEBUG(dbgs() << "Trying MicroMips32 table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address,
this, STI);
@@ -680,7 +766,28 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
- if (isMips32r6()) {
+ if (hasCOP3()) {
+ DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
+ Result =
+ decodeInstruction(DecoderTableCOP3_32, instr, Insn, Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ if (hasMips32r6() && isGP64()) {
+ DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n");
+ Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
+ if (hasMips32r6()) {
+ DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n");
Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn,
Address, this, STI);
if (Result != MCDisassembler::Fail) {
@@ -689,6 +796,7 @@ MipsDisassembler::getInstruction(MCInst &instr,
}
}
+ DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
this, STI);
@@ -840,6 +948,17 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeMem(MCInst &Inst,
unsigned Insn,
uint64_t Address,
@@ -965,6 +1084,27 @@ static DecodeStatus DecodeFMem(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst,
+ unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff);
+ unsigned Rt = fieldFromInstruction(Insn, 16, 5);
+ unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+ Rt = getReg(Decoder, Mips::GPR32RegClassID, Rt);
+ Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+ if(Inst.getOpcode() == Mips::SC_R6 || Inst.getOpcode() == Mips::SCD_R6){
+ Inst.addOperand(MCOperand::CreateReg(Rt));
+ }
+
+ Inst.addOperand(MCOperand::CreateReg(Rt));
+ Inst.addOperand(MCOperand::CreateReg(Base));
+ Inst.addOperand(MCOperand::CreateImm(Offset));
+
+ return MCDisassembler::Success;
+}
static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst,
unsigned RegNo,
@@ -1197,3 +1337,9 @@ static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
return MCDisassembler::Success;
}
+
+static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3));
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Mips/MCTargetDesc/Android.mk b/lib/Target/Mips/MCTargetDesc/Android.mk
index 7ee11a1..c8d18fc 100644
--- a/lib/Target/Mips/MCTargetDesc/Android.mk
+++ b/lib/Target/Mips/MCTargetDesc/Android.mk
@@ -7,6 +7,7 @@ mips_mc_desc_TBLGEN_TABLES := \
MipsGenSubtargetInfo.inc
mips_mc_desc_SRC_FILES := \
+ MipsABIFlagsSection.cpp \
MipsAsmBackend.cpp \
MipsELFObjectWriter.cpp \
MipsELFStreamer.cpp \
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index d3e2fd7..c14ee35 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMipsDesc
+ MipsABIFlagsSection.cpp
MipsAsmBackend.cpp
MipsELFObjectWriter.cpp
MipsELFStreamer.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
new file mode 100644
index 0000000..52d5dd3
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
@@ -0,0 +1,60 @@
+//===-- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsABIFlagsSection.h"
+
+using namespace llvm;
+
+uint8_t MipsABIFlagsSection::getFpABIValue() {
+ switch (FpABI) {
+ case FpABIKind::ANY:
+ return Val_GNU_MIPS_ABI_FP_ANY;
+ case FpABIKind::XX:
+ return Val_GNU_MIPS_ABI_FP_XX;
+ case FpABIKind::S32:
+ return Val_GNU_MIPS_ABI_FP_DOUBLE;
+ case FpABIKind::S64:
+ if (Is32BitABI)
+ return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A;
+ return Val_GNU_MIPS_ABI_FP_DOUBLE;
+ }
+
+ llvm_unreachable("unexpected fp abi value");
+}
+
+StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) {
+ switch (Value) {
+ case FpABIKind::XX:
+ return "xx";
+ case FpABIKind::S32:
+ return "32";
+ case FpABIKind::S64:
+ return "64";
+ default:
+ llvm_unreachable("unsupported fp abi value");
+ }
+}
+
+namespace llvm {
+MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) {
+ // Write out a Elf_Internal_ABIFlags_v0 struct
+ OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version
+ OS.EmitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level
+ OS.EmitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev
+ OS.EmitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size
+ OS.EmitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size
+ OS.EmitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size
+ OS.EmitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi
+ OS.EmitIntValue(ABIFlagsSection.getISAExtensionSetValue(), 4); // isa_ext
+ OS.EmitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases
+ OS.EmitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1
+ OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2
+ return OS;
+}
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
new file mode 100644
index 0000000..ab18c44
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h
@@ -0,0 +1,237 @@
+//===-- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSABIFLAGSSECTION_H
+#define MIPSABIFLAGSSECTION_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCStreamer;
+
+struct MipsABIFlagsSection {
+ // Values for the xxx_size bytes of an ABI flags structure.
+ enum AFL_REG {
+ AFL_REG_NONE = 0x00, // No registers.
+ AFL_REG_32 = 0x01, // 32-bit registers.
+ AFL_REG_64 = 0x02, // 64-bit registers.
+ AFL_REG_128 = 0x03 // 128-bit registers.
+ };
+
+ // Masks for the ases word of an ABI flags structure.
+ enum AFL_ASE {
+ AFL_ASE_DSP = 0x00000001, // DSP ASE.
+ AFL_ASE_DSPR2 = 0x00000002, // DSP R2 ASE.
+ AFL_ASE_EVA = 0x00000004, // Enhanced VA Scheme.
+ AFL_ASE_MCU = 0x00000008, // MCU (MicroController) ASE.
+ AFL_ASE_MDMX = 0x00000010, // MDMX ASE.
+ AFL_ASE_MIPS3D = 0x00000020, // MIPS-3D ASE.
+ AFL_ASE_MT = 0x00000040, // MT ASE.
+ AFL_ASE_SMARTMIPS = 0x00000080, // SmartMIPS ASE.
+ AFL_ASE_VIRT = 0x00000100, // VZ ASE.
+ AFL_ASE_MSA = 0x00000200, // MSA ASE.
+ AFL_ASE_MIPS16 = 0x00000400, // MIPS16 ASE.
+ AFL_ASE_MICROMIPS = 0x00000800, // MICROMIPS ASE.
+ AFL_ASE_XPA = 0x00001000 // XPA ASE.
+ };
+
+ // Values for the isa_ext word of an ABI flags structure.
+ enum AFL_EXT {
+ AFL_EXT_XLR = 1, // RMI Xlr instruction.
+ AFL_EXT_OCTEON2 = 2, // Cavium Networks Octeon2.
+ AFL_EXT_OCTEONP = 3, // Cavium Networks OcteonP.
+ AFL_EXT_LOONGSON_3A = 4, // Loongson 3A.
+ AFL_EXT_OCTEON = 5, // Cavium Networks Octeon.
+ AFL_EXT_5900 = 6, // MIPS R5900 instruction.
+ AFL_EXT_4650 = 7, // MIPS R4650 instruction.
+ AFL_EXT_4010 = 8, // LSI R4010 instruction.
+ AFL_EXT_4100 = 9, // NEC VR4100 instruction.
+ AFL_EXT_3900 = 10, // Toshiba R3900 instruction.
+ AFL_EXT_10000 = 11, // MIPS R10000 instruction.
+ AFL_EXT_SB1 = 12, // Broadcom SB-1 instruction.
+ AFL_EXT_4111 = 13, // NEC VR4111/VR4181 instruction.
+ AFL_EXT_4120 = 14, // NEC VR4120 instruction.
+ AFL_EXT_5400 = 15, // NEC VR5400 instruction.
+ AFL_EXT_5500 = 16, // NEC VR5500 instruction.
+ AFL_EXT_LOONGSON_2E = 17, // ST Microelectronics Loongson 2E.
+ AFL_EXT_LOONGSON_2F = 18 // ST Microelectronics Loongson 2F.
+ };
+
+ // Values for the fp_abi word of an ABI flags structure.
+ enum Val_GNU_MIPS_ABI {
+ Val_GNU_MIPS_ABI_FP_ANY = 0,
+ Val_GNU_MIPS_ABI_FP_DOUBLE = 1,
+ Val_GNU_MIPS_ABI_FP_XX = 5,
+ Val_GNU_MIPS_ABI_FP_64 = 6,
+ Val_GNU_MIPS_ABI_FP_64A = 7
+ };
+
+ enum AFL_FLAGS1 {
+ AFL_FLAGS1_ODDSPREG = 1
+ };
+
+ // Internal representation of the values used in .module fp=value
+ enum class FpABIKind { ANY, XX, S32, S64 };
+
+ // Version of flags structure.
+ uint16_t Version;
+ // The level of the ISA: 1-5, 32, 64.
+ uint8_t ISALevel;
+ // The revision of ISA: 0 for MIPS V and below, 1-n otherwise.
+ uint8_t ISARevision;
+ // The size of general purpose registers.
+ AFL_REG GPRSize;
+ // The size of co-processor 1 registers.
+ AFL_REG CPR1Size;
+ // The size of co-processor 2 registers.
+ AFL_REG CPR2Size;
+ // Processor-specific extension.
+ uint32_t ISAExtensionSet;
+ // Mask of ASEs used.
+ uint32_t ASESet;
+
+ bool OddSPReg;
+
+ bool Is32BitABI;
+
+protected:
+ // The floating-point ABI.
+ FpABIKind FpABI;
+
+public:
+ MipsABIFlagsSection()
+ : Version(0), ISALevel(0), ISARevision(0), GPRSize(AFL_REG_NONE),
+ CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), ISAExtensionSet(0),
+ ASESet(0), OddSPReg(false), Is32BitABI(false), FpABI(FpABIKind::ANY) {}
+
+ uint16_t getVersionValue() { return (uint16_t)Version; }
+ uint8_t getISALevelValue() { return (uint8_t)ISALevel; }
+ uint8_t getISARevisionValue() { return (uint8_t)ISARevision; }
+ uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; }
+ uint8_t getCPR1SizeValue() { return (uint8_t)CPR1Size; }
+ uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; }
+ uint8_t getFpABIValue();
+ uint32_t getISAExtensionSetValue() { return (uint32_t)ISAExtensionSet; }
+ uint32_t getASESetValue() { return (uint32_t)ASESet; }
+
+ uint32_t getFlags1Value() {
+ uint32_t Value = 0;
+
+ if (OddSPReg)
+ Value |= (uint32_t)AFL_FLAGS1_ODDSPREG;
+
+ return Value;
+ }
+
+ uint32_t getFlags2Value() { return 0; }
+
+ FpABIKind getFpABI() { return FpABI; }
+ void setFpABI(FpABIKind Value, bool IsABI32Bit) {
+ FpABI = Value;
+ Is32BitABI = IsABI32Bit;
+ }
+ StringRef getFpABIString(FpABIKind Value);
+
+ template <class PredicateLibrary>
+ void setISALevelAndRevisionFromPredicates(const PredicateLibrary &P) {
+ if (P.hasMips64()) {
+ ISALevel = 64;
+ if (P.hasMips64r6())
+ ISARevision = 6;
+ else if (P.hasMips64r2())
+ ISARevision = 2;
+ else
+ ISARevision = 1;
+ } else if (P.hasMips32()) {
+ ISALevel = 32;
+ if (P.hasMips32r6())
+ ISARevision = 6;
+ else if (P.hasMips32r2())
+ ISARevision = 2;
+ else
+ ISARevision = 1;
+ } else {
+ ISARevision = 0;
+ if (P.hasMips5())
+ ISALevel = 5;
+ else if (P.hasMips4())
+ ISALevel = 4;
+ else if (P.hasMips3())
+ ISALevel = 3;
+ else if (P.hasMips2())
+ ISALevel = 2;
+ else if (P.hasMips1())
+ ISALevel = 1;
+ else
+ llvm_unreachable("Unknown ISA level!");
+ }
+ }
+
+ template <class PredicateLibrary>
+ void setGPRSizeFromPredicates(const PredicateLibrary &P) {
+ GPRSize = P.isGP64bit() ? AFL_REG_64 : AFL_REG_32;
+ }
+
+ template <class PredicateLibrary>
+ void setCPR1SizeFromPredicates(const PredicateLibrary &P) {
+ if (P.mipsSEUsesSoftFloat())
+ CPR1Size = AFL_REG_NONE;
+ else if (P.hasMSA())
+ CPR1Size = AFL_REG_128;
+ else
+ CPR1Size = P.isFP64bit() ? AFL_REG_64 : AFL_REG_32;
+ }
+
+ template <class PredicateLibrary>
+ void setASESetFromPredicates(const PredicateLibrary &P) {
+ ASESet = 0;
+ if (P.hasDSP())
+ ASESet |= AFL_ASE_DSP;
+ if (P.hasDSPR2())
+ ASESet |= AFL_ASE_DSPR2;
+ if (P.hasMSA())
+ ASESet |= AFL_ASE_MSA;
+ if (P.inMicroMipsMode())
+ ASESet |= AFL_ASE_MICROMIPS;
+ if (P.inMips16Mode())
+ ASESet |= AFL_ASE_MIPS16;
+ }
+
+ template <class PredicateLibrary>
+ void setFpAbiFromPredicates(const PredicateLibrary &P) {
+ Is32BitABI = P.isABI_O32();
+
+ FpABI = FpABIKind::ANY;
+ if (P.isABI_N32() || P.isABI_N64())
+ FpABI = FpABIKind::S64;
+ else if (P.isABI_O32()) {
+ if (P.isFP64bit())
+ FpABI = FpABIKind::S64;
+ else if (P.isABI_FPXX())
+ FpABI = FpABIKind::XX;
+ else
+ FpABI = FpABIKind::S32;
+ }
+ }
+
+ template <class PredicateLibrary>
+ void setAllFromPredicates(const PredicateLibrary &P) {
+ setISALevelAndRevisionFromPredicates(P);
+ setGPRSizeFromPredicates(P);
+ setCPR1SizeFromPredicates(P);
+ setASESetFromPredicates(P);
+ setFpAbiFromPredicates(P);
+ }
+};
+
+MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection);
+}
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 5375a00..d8e6128 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -70,6 +70,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (!isIntN(16, Value) && Ctx)
Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
break;
+ case Mips::fixup_MIPS_PC19_S2:
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t)Value / 4;
+ // We now check if Value can be encoded as a 19-bit signed immediate.
+ if (!isIntN(19, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC19 fixup");
+ break;
case Mips::fixup_Mips_26:
// So far we are only using this type for jumps.
// The displacement is then divided by 4 to give us an 28 bit
@@ -104,6 +111,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (!isIntN(16, Value) && Ctx)
Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
break;
+ case Mips::fixup_MIPS_PC18_S3:
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t)Value / 8;
+ // We now check if Value can be encoded as a 18-bit signed immediate.
+ if (!isIntN(18, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC18 fixup");
+ break;
case Mips::fixup_MIPS_PC21_S2:
Value -= 4;
// Forcing a signed division because Value can be negative.
@@ -247,6 +261,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_Mips_GOT_LO16", 0, 16, 0 },
{ "fixup_Mips_CALL_HI16", 0, 16, 0 },
{ "fixup_Mips_CALL_LO16", 0, 16, 0 },
+ { "fixup_Mips_PC18_S3", 0, 18, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC19_S2", 0, 19, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
@@ -308,6 +324,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_Mips_GOT_LO16", 16, 16, 0 },
{ "fixup_Mips_CALL_HI16", 16, 16, 0 },
{ "fixup_Mips_CALL_LO16", 16, 16, 0 },
+ { "fixup_Mips_PC18_S3", 14, 18, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC19_S2", 13, 19, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index bc695e6..d5c3dbc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -65,7 +65,7 @@ public:
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
// FIXME.
- assert(0 && "RelaxInstruction() unimplemented");
+ llvm_unreachable("RelaxInstruction() unimplemented");
return false;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 74c12ff..49ac256 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -193,6 +193,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
Type = ELF::R_MICROMIPS_TLS_TPREL_LO16;
break;
+ case Mips::fixup_MIPS_PC19_S2:
+ Type = ELF::R_MIPS_PC19_S2;
+ break;
+ case Mips::fixup_MIPS_PC18_S3:
+ Type = ELF::R_MIPS_PC18_S3;
+ break;
case Mips::fixup_MIPS_PC21_S2:
Type = ELF::R_MIPS_PC21_S2;
break;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 3079004..05080f0 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -128,6 +128,12 @@ namespace Mips {
// resulting in - R_MIPS_CALL_LO16
fixup_Mips_CALL_LO16,
+ // resulting in - R_MIPS_PC18_S3
+ fixup_MIPS_PC18_S3,
+
+ // resulting in - R_MIPS_PC19_S2
+ fixup_MIPS_PC19_S2,
+
// resulting in - R_MIPS_PC21_S2
fixup_MIPS_PC21_S2,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 6aa3c76..e415412 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -38,7 +38,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) {
ZeroDirective = "\t.space\t";
GPRel32Directive = "\t.gpword\t";
GPRel64Directive = "\t.gpdword\t";
- DebugLabelSuffix = "=.";
+ UseAssignmentForEHBegin = true;
SupportsDebugInformation = true;
ExceptionsType = ExceptionHandling::DwarfCFI;
HasLEB128 = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 85e0bf1..43fc521 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -621,11 +621,42 @@ unsigned
MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- assert(MI.getOperand(OpNo).isImm());
- // The immediate is encoded as 'immediate << 2'.
- unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
- assert((Res & 3) == 0);
- return Res >> 2;
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 2'.
+ unsigned Res = getMachineOpValue(MI, MO, Fixups, STI);
+ assert((Res & 3) == 0);
+ return Res >> 2;
+ }
+
+ assert(MO.isExpr() &&
+ "getSimm19Lsl2Encoding expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC19_S2)));
+ return 0;
+}
+
+unsigned
+MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ // The immediate is encoded as 'immediate << 3'.
+ unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+ assert((Res & 7) == 0);
+ return Res >> 3;
+ }
+
+ assert(MO.isExpr() &&
+ "getSimm18Lsl2Encoding expects only expressions or an immediate");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC18_S3)));
+ return 0;
}
#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 3f7daab..304167f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -141,6 +141,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index 21ccc3c..5bba3e5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
@@ -83,33 +84,6 @@ MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void MipsMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 8d7aacd..f193dc9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -49,7 +49,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 660e5a7..d2b929b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -133,6 +133,12 @@ createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
return S;
}
+static MCStreamer *createMipsNullStreamer(MCContext &Ctx) {
+ MCStreamer *S = llvm::createNullStreamer(Ctx);
+ new MipsTargetStreamer(*S);
+ return S;
+}
+
extern "C" void LLVMInitializeMipsTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo);
@@ -187,6 +193,12 @@ extern "C" void LLVMInitializeMipsTargetMC() {
TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer);
TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMipsTarget, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMipselTarget, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMips64Target, createMipsNullStreamer);
+ TargetRegistry::RegisterNullStreamer(TheMips64elTarget,
+ createMipsNullStreamer);
+
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
createMipsAsmBackendEB32);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index cd6be73..6cde8f9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -48,7 +48,13 @@ private:
bool PendingCall;
bool isIndirectJump(const MCInst &MI) {
- return MI.getOpcode() == Mips::JR || MI.getOpcode() == Mips::RET;
+ if (MI.getOpcode() == Mips::JALR) {
+ // MIPS32r6/MIPS64r6 doesn't have a JR instruction and uses JALR instead.
+ // JALR is an indirect branch if the link register is $0.
+ assert(MI.getOperand(0).isReg());
+ return MI.getOperand(0).getReg() == Mips::ZERO;
+ }
+ return MI.getOpcode() == Mips::JR;
}
bool isStackPointerFirstOperand(const MCInst &MI) {
@@ -56,7 +62,9 @@ private:
&& MI.getOperand(0).getReg() == Mips::SP);
}
- bool isCall(unsigned Opcode, bool *IsIndirectCall) {
+ bool isCall(const MCInst &MI, bool *IsIndirectCall) {
+ unsigned Opcode = MI.getOpcode();
+
*IsIndirectCall = false;
switch (Opcode) {
@@ -64,12 +72,19 @@ private:
return false;
case Mips::JAL:
+ case Mips::BAL:
case Mips::BAL_BR:
case Mips::BLTZAL:
case Mips::BGEZAL:
return true;
case Mips::JALR:
+ // JALR is only a call if the link register is not $0. Otherwise it's an
+ // indirect branch.
+ assert(MI.getOperand(0).isReg());
+ if (MI.getOperand(0).getReg() == Mips::ZERO)
+ return false;
+
*IsIndirectCall = true;
return true;
}
@@ -137,24 +152,23 @@ public:
&IsStore);
bool IsSPFirstOperand = isStackPointerFirstOperand(Inst);
if (IsMemAccess || IsSPFirstOperand) {
- if (PendingCall)
- report_fatal_error("Dangerous instruction in branch delay slot!");
-
bool MaskBefore = (IsMemAccess
&& baseRegNeedsLoadStoreMask(Inst.getOperand(AddrIdx)
.getReg()));
bool MaskAfter = IsSPFirstOperand && !IsStore;
- if (MaskBefore || MaskAfter)
+ if (MaskBefore || MaskAfter) {
+ if (PendingCall)
+ report_fatal_error("Dangerous instruction in branch delay slot!");
sandboxLoadStoreStackChange(Inst, AddrIdx, STI, MaskBefore, MaskAfter);
- else
- MipsELFStreamer::EmitInstruction(Inst, STI);
- return;
+ return;
+ }
+ // fallthrough
}
// Sandbox calls by aligning call and branch delay to the bundle end.
// For indirect calls, emit the mask before the call.
bool IsIndirectCall;
- if (isCall(Inst.getOpcode(), &IsIndirectCall)) {
+ if (isCall(Inst, &IsIndirectCall)) {
if (PendingCall)
report_fatal_error("Dangerous instruction in branch delay slot!");
@@ -203,6 +217,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
case Mips::LWC1:
case Mips::LDC1:
case Mips::LL:
+ case Mips::LL_R6:
case Mips::LWL:
case Mips::LWR:
*AddrIdx = 1;
@@ -223,6 +238,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
// Store instructions with base address register in position 2.
case Mips::SC:
+ case Mips::SC_R6:
*AddrIdx = 2;
if (IsStore)
*IsStore = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index a8fa272..fbe375b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -27,10 +27,43 @@
using namespace llvm;
-// Pin vtable to this file.
-void MipsTargetStreamer::anchor() {}
-
-MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S), canHaveModuleDirective(true) {}
+void MipsTargetStreamer::emitDirectiveSetMicroMips() {}
+void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {}
+void MipsTargetStreamer::emitDirectiveSetMips16() {}
+void MipsTargetStreamer::emitDirectiveSetNoMips16() {}
+void MipsTargetStreamer::emitDirectiveSetReorder() {}
+void MipsTargetStreamer::emitDirectiveSetNoReorder() {}
+void MipsTargetStreamer::emitDirectiveSetMacro() {}
+void MipsTargetStreamer::emitDirectiveSetNoMacro() {}
+void MipsTargetStreamer::emitDirectiveSetAt() {}
+void MipsTargetStreamer::emitDirectiveSetNoAt() {}
+void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {}
+void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {}
+void MipsTargetStreamer::emitDirectiveAbiCalls() {}
+void MipsTargetStreamer::emitDirectiveNaN2008() {}
+void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
+void MipsTargetStreamer::emitDirectiveOptionPic0() {}
+void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
+ unsigned ReturnReg) {}
+void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
+void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) {
+}
+void MipsTargetStreamer::emitDirectiveSetMips32R2() {}
+void MipsTargetStreamer::emitDirectiveSetMips64() {}
+void MipsTargetStreamer::emitDirectiveSetMips64R2() {}
+void MipsTargetStreamer::emitDirectiveSetDsp() {}
+void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
+ const MCSymbol &Sym, bool IsReg) {
+}
+void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ if (!Enabled && !IsO32ABI)
+ report_fatal_error("+nooddspreg is only valid for O32");
+}
MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
formatted_raw_ostream &OS)
@@ -38,42 +71,52 @@ MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S,
void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() {
OS << "\t.set\tmicromips\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() {
OS << "\t.set\tnomicromips\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips16() {
OS << "\t.set\tmips16\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() {
OS << "\t.set\tnomips16\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetReorder() {
OS << "\t.set\treorder\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() {
OS << "\t.set\tnoreorder\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMacro() {
OS << "\t.set\tmacro\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() {
OS << "\t.set\tnomacro\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetAt() {
OS << "\t.set\tat\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetNoAt() {
OS << "\t.set\tnoat\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) {
@@ -110,24 +153,28 @@ void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() {
OS << "\t.set\tmips32r2\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64() {
OS << "\t.set\tmips64\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() {
OS << "\t.set\tmips64r2\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveSetDsp() {
OS << "\t.set\tdsp\n";
+ setCanHaveModuleDir(false);
}
// Print a 32 bit hex number with all numbers.
static void printHex32(unsigned Value, raw_ostream &OS) {
OS << "0x";
for (int i = 7; i >= 0; i--)
- OS.write_hex((Value & (0xF << (i*4))) >> (i*4));
+ OS.write_hex((Value & (0xF << (i * 4))) >> (i * 4));
}
void MipsTargetAsmStreamer::emitMask(unsigned CPUBitmask,
@@ -147,6 +194,7 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) {
OS << "\t.cpload\t$"
<< StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+ setCanHaveModuleDir(false);
}
void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -165,6 +213,34 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
OS << ", ";
OS << Sym.getName() << "\n";
+ setCanHaveModuleDir(false);
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleFP(
+ MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) {
+ MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI);
+
+ StringRef ModuleValue;
+ OS << "\t.module\tfp=";
+ OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveSetFp(
+ MipsABIFlagsSection::FpABIKind Value) {
+ StringRef ModuleValue;
+ OS << "\t.set\tfp=";
+ OS << ABIFlagsSection.getFpABIString(Value) << "\n";
+}
+
+void MipsTargetAsmStreamer::emitMipsAbiFlags() {
+ // No action required for text output.
+}
+
+void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+
+ OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n";
}
// This part is for ELF object output.
@@ -174,7 +250,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
MCAssembler &MCA = getStreamer().getAssembler();
uint64_t Features = STI.getFeatureBits();
Triple T(STI.getTargetTriple());
- Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
+ Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_)
? true
: false;
@@ -182,16 +258,28 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
unsigned EFlags = 0;
// Architecture
- if (Features & Mips::FeatureMips64r2)
+ if (Features & Mips::FeatureMips64r6)
+ EFlags |= ELF::EF_MIPS_ARCH_64R6;
+ else if (Features & Mips::FeatureMips64r2)
EFlags |= ELF::EF_MIPS_ARCH_64R2;
else if (Features & Mips::FeatureMips64)
EFlags |= ELF::EF_MIPS_ARCH_64;
+ else if (Features & Mips::FeatureMips5)
+ EFlags |= ELF::EF_MIPS_ARCH_5;
else if (Features & Mips::FeatureMips4)
EFlags |= ELF::EF_MIPS_ARCH_4;
+ else if (Features & Mips::FeatureMips3)
+ EFlags |= ELF::EF_MIPS_ARCH_3;
+ else if (Features & Mips::FeatureMips32r6)
+ EFlags |= ELF::EF_MIPS_ARCH_32R6;
else if (Features & Mips::FeatureMips32r2)
EFlags |= ELF::EF_MIPS_ARCH_32R2;
else if (Features & Mips::FeatureMips32)
EFlags |= ELF::EF_MIPS_ARCH_32;
+ else if (Features & Mips::FeatureMips2)
+ EFlags |= ELF::EF_MIPS_ARCH_2;
+ else
+ EFlags |= ELF::EF_MIPS_ARCH_1;
if (T.isArch64Bit()) {
if (Features & Mips::FeatureN32)
@@ -244,17 +332,17 @@ void MipsTargetELFStreamer::finish() {
ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, SectionKind::getMetadata());
OS.SwitchSection(Sec);
- OS.EmitIntValue(1, 1); // kind
+ OS.EmitIntValue(1, 1); // kind
OS.EmitIntValue(40, 1); // size
- OS.EmitIntValue(0, 2); // section
- OS.EmitIntValue(0, 4); // info
- OS.EmitIntValue(0, 4); // ri_gprmask
- OS.EmitIntValue(0, 4); // pad
- OS.EmitIntValue(0, 4); // ri_cpr[0]mask
- OS.EmitIntValue(0, 4); // ri_cpr[1]mask
- OS.EmitIntValue(0, 4); // ri_cpr[2]mask
- OS.EmitIntValue(0, 4); // ri_cpr[3]mask
- OS.EmitIntValue(0, 8); // ri_gp_value
+ OS.EmitIntValue(0, 2); // section
+ OS.EmitIntValue(0, 4); // info
+ OS.EmitIntValue(0, 4); // ri_gprmask
+ OS.EmitIntValue(0, 4); // pad
+ OS.EmitIntValue(0, 4); // ri_cpr[0]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[1]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[2]mask
+ OS.EmitIntValue(0, 4); // ri_cpr[3]mask
+ OS.EmitIntValue(0, 8); // ri_gp_value
} else {
const MCSectionELF *Sec =
Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC,
@@ -268,6 +356,7 @@ void MipsTargetELFStreamer::finish() {
OS.EmitIntValue(0, 4); // ri_cpr[3]mask
OS.EmitIntValue(0, 4); // ri_gp_value
}
+ emitMipsAbiFlags();
}
void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
@@ -276,11 +365,11 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
if (Value->getKind() != MCExpr::SymbolRef)
return;
const MCSymbol &RhsSym =
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
+ static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
uint8_t Type = MCELF::GetType(Data);
- if ((Type != ELF::STT_FUNC)
- || !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+ if ((Type != ELF::STT_FUNC) ||
+ !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
return;
MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -305,6 +394,7 @@ void MipsTargetELFStreamer::emitDirectiveSetMicroMips() {
void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() {
MicroMipsEnabled = false;
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips16() {
@@ -312,14 +402,17 @@ void MipsTargetELFStreamer::emitDirectiveSetMips16() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_ARCH_ASE_M16;
MCA.setELFHeaderEFlags(Flags);
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoMips16() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetReorder() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
@@ -327,22 +420,27 @@ void MipsTargetELFStreamer::emitDirectiveSetNoReorder() {
unsigned Flags = MCA.getELFHeaderEFlags();
Flags |= ELF::EF_MIPS_NOREORDER;
MCA.setELFHeaderEFlags(Flags);
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMacro() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoMacro() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetAt() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetNoAt() {
// FIXME: implement.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
@@ -411,19 +509,19 @@ void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask,
}
void MipsTargetELFStreamer::emitDirectiveSetMips32R2() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips64() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetMips64R2() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveSetDsp() {
- // No action required for ELF output.
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
@@ -473,6 +571,8 @@ void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
TmpInst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(TmpInst, STI);
+
+ setCanHaveModuleDir(false);
}
void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
@@ -528,4 +628,27 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
Inst.addOperand(MCOperand::CreateReg(Mips::GP));
Inst.addOperand(MCOperand::CreateReg(RegNo));
getStreamer().EmitInstruction(Inst, STI);
+
+ setCanHaveModuleDir(false);
+}
+
+void MipsTargetELFStreamer::emitMipsAbiFlags() {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCContext &Context = MCA.getContext();
+ MCStreamer &OS = getStreamer();
+ const MCSectionELF *Sec =
+ Context.getELFSection(".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS,
+ ELF::SHF_ALLOC, SectionKind::getMetadata());
+ MCSectionData &ABIShndxSD = MCA.getOrCreateSectionData(*Sec);
+ ABIShndxSD.setAlignment(8);
+ OS.SwitchSection(Sec);
+
+ OS << ABIFlagsSection;
+}
+
+void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled,
+ bool IsO32ABI) {
+ MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI);
+
+ ABIFlagsSection.OddSPReg = Enabled;
}
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index d95f9b0..b93017a 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -24,13 +24,13 @@ def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM_MM<0x2f>;
def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>,
LW_FM_MM<0x2e>;
def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>,
- LWXC1_FM_MM<0x48>;
+ LWXC1_FM_MM<0x48>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>,
- SWXC1_FM_MM<0x88>;
+ SWXC1_FM_MM<0x88>, INSN_MIPS4_32R2_NOT_32R6_64R6;
def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>,
- LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2;
+ LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
- SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2;
+ SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6;
def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
CEQS_FM_MM<0>;
@@ -38,9 +38,9 @@ def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
CEQS_FM_MM<1>;
def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>,
- BC1F_FM_MM<0x1c>;
+ BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>,
- BC1F_FM_MM<0x1d>;
+ BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6;
def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
ROUND_W_FM_MM<0, 0x6c>;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 9904bc6..87a3a3e 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -246,7 +246,6 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
}
def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>;
def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>;
- def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>;
/// Branch Instructions
def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index ea16331..dd3bc9b 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -61,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
"Support 64-bit FP registers.">;
+def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true",
+ "Support for FPXX.">;
def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
"IEEE 754-2008 NaN encoding.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
@@ -73,6 +75,9 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64",
"Enable n64 ABI">;
def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
+def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false",
+ "Disable odd numbered single-precision "
+ "registers">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index c01d03a..93706c2 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -16,6 +16,7 @@
#include "Mips16InstrInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,6 +29,9 @@
using namespace llvm;
+Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
+
void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 3f7829d..1fb7eda 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -19,8 +19,7 @@
namespace llvm {
class Mips16FrameLowering : public MipsFrameLowering {
public:
- explicit Mips16FrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.stackAlignment()) {}
+ explicit Mips16FrameLowering(const MipsSubtarget &STI);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 4e86a27..6672aef 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- if (!Subtarget.inMips16Mode())
+ if (!Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
@@ -226,9 +226,9 @@ bool Mips16DAGToDAGISel::selectAddr16(
const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
if (LS) {
- if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2())
+ if (LS->getMemoryVT() == MVT::f32 && Subtarget->hasMips4_32r2())
return false;
- if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2())
+ if (LS->getMemoryVT() == MVT::f64 && Subtarget->hasMips4_32r2())
return false;
}
}
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 9102450..81a05df 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -120,13 +120,6 @@ static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = {
Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM)
: MipsTargetLowering(TM) {
- //
- // set up as if mips32 and then revert so we can test the mechanism
- // for switching
- addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
- addRegisterClass(MVT::f32, &Mips::FGR32RegClass);
- computeRegisterProperties();
- clearRegisterClasses();
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass);
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index df88333..2a5eec5 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef Mips16ISELLOWERING_H
-#define Mips16ISELLOWERING_H
+#ifndef MIPS16ISELLOWERING_H
+#define MIPS16ISELLOWERING_H
#include "MipsISelLowering.h"
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 11166c4..5e4eebb 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1370,9 +1370,11 @@ def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)),
(Jal16 texternalsym:$dst)>;
// Indirect branch
-def: Mips16Pat<
- (brind CPU16Regs:$rs),
- (JrcRx16 CPU16Regs:$rs)>;
+def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> {
+ // Ensure that the addition of MIPS32r6/MIPS64r6 support does not change
+ // MIPS16's behaviour.
+ let AddedComplexity = 1;
+}
// Jump and Link (Call)
let isCall=1, hasDelaySlot=0 in
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
index a3f9df5..e4ec96a 100644
--- a/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -39,7 +39,10 @@ def OPGROUP_DAUI : OPGROUP<0b011101>;
def OPGROUP_PCREL : OPGROUP<0b111011>;
def OPGROUP_REGIMM : OPGROUP<0b000001>;
def OPGROUP_SPECIAL : OPGROUP<0b000000>;
+// The spec occasionally names this value LL, LLD, SC, or SCD.
def OPGROUP_SPECIAL3 : OPGROUP<0b011111>;
+// The spec names this constant LWC2, LDC2, SWC2, and SDC2 in different places.
+def OPGROUP_COP2LDST : OPGROUP<0b010010>;
class OPCODE2<bits<2> Val> {
bits<2> Value = Val;
@@ -48,6 +51,11 @@ def OPCODE2_ADDIUPC : OPCODE2<0b00>;
def OPCODE2_LWPC : OPCODE2<0b01>;
def OPCODE2_LWUPC : OPCODE2<0b10>;
+class OPCODE3<bits<3> Val> {
+ bits<3> Value = Val;
+}
+def OPCODE3_LDPC : OPCODE3<0b110>;
+
class OPCODE5<bits<5> Val> {
bits<5> Value = Val;
}
@@ -59,6 +67,13 @@ def OPCODE5_BC1EQZ : OPCODE5<0b01001>;
def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
+def OPCODE5_BGEZAL : OPCODE5<0b10001>;
+// The next four constants are unnamed in the spec. These names are taken from
+// the OPGROUP names they are used with.
+def OPCODE5_LDC2 : OPCODE5<0b01110>;
+def OPCODE5_LWC2 : OPCODE5<0b01010>;
+def OPCODE5_SDC2 : OPCODE5<0b01111>;
+def OPCODE5_SWC2 : OPCODE5<0b01011>;
class OPCODE6<bits<6> Val> {
bits<6> Value = Val;
@@ -67,6 +82,22 @@ def OPCODE6_ALIGN : OPCODE6<0b100000>;
def OPCODE6_DALIGN : OPCODE6<0b100100>;
def OPCODE6_BITSWAP : OPCODE6<0b100000>;
def OPCODE6_DBITSWAP : OPCODE6<0b100100>;
+def OPCODE6_JALR : OPCODE6<0b001001>;
+def OPCODE6_CACHE : OPCODE6<0b100101>;
+def OPCODE6_PREF : OPCODE6<0b110101>;
+// The next four constants are unnamed in the spec. These names are taken from
+// the OPGROUP names they are used with.
+def OPCODE6_LL : OPCODE6<0b110110>;
+def OPCODE6_LLD : OPCODE6<0b110111>;
+def OPCODE6_SC : OPCODE6<0b100110>;
+def OPCODE6_SCD : OPCODE6<0b100111>;
+def OPCODE6_CLO : OPCODE6<0b010001>;
+def OPCODE6_CLZ : OPCODE6<0b010000>;
+def OPCODE6_DCLO : OPCODE6<0b010011>;
+def OPCODE6_DCLZ : OPCODE6<0b010010>;
+def OPCODE6_LSA : OPCODE6<0b000101>;
+def OPCODE6_DLSA : OPCODE6<0b010101>;
+def OPCODE6_SDBBP : OPCODE6<0b001110>;
class FIELD_FMT<bits<5> Val> {
bits<5> Value = Val;
@@ -77,22 +108,23 @@ def FIELD_FMT_D : FIELD_FMT<0b10001>;
class FIELD_CMP_COND<bits<5> Val> {
bits<5> Value = Val;
}
-def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>;
+// Note: The CMP_COND_FMT names differ from the C_COND_FMT names.
+def FIELD_CMP_COND_AF : FIELD_CMP_COND<0b00000>;
def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>;
def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>;
def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>;
-def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>;
+def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b00100>;
def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>;
-def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>;
+def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b00110>;
def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>;
-def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>;
-def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>;
+def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0b01000>;
+def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0b01001>;
def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>;
-def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>;
-def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>;
-def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>;
-def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>;
-def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>;
+def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0b01011>;
+def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0b01100>;
+def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0b01101>;
+def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0b01110>;
+def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0b01111>;
class FIELD_CMP_FORMAT<bits<5> Val> {
bits<5> Value = Val;
@@ -139,6 +171,17 @@ class DAUI_FM : AUI_FM {
let Inst{31-26} = OPGROUP_DAUI.Value;
}
+class BAL_FM : MipsR6Inst {
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_REGIMM.Value;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = OPCODE5_BGEZAL.Value;
+ let Inst{15-0} = offset;
+}
+
class COP1_2R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst {
bits<5> fs;
bits<5> fd;
@@ -216,6 +259,18 @@ class PCREL19_FM<OPCODE2 Operation> : MipsR6Inst {
let Inst{18-0} = imm;
}
+class PCREL18_FM<OPCODE3 Operation> : MipsR6Inst {
+ bits<5> rs;
+ bits<18> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_PCREL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-18} = Operation.Value;
+ let Inst{17-0} = imm;
+}
+
class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst {
bits<5> rd;
bits<5> rt;
@@ -230,6 +285,36 @@ class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst {
let Inst{5-0} = Operation.Value;
}
+class SPECIAL3_MEM_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-7} = offset;
+ let Inst{6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class SPECIAL_2R_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00001;
+ let Inst{5-0} = Operation.Value;
+}
+
class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst {
bits<5> rd;
bits<5> rs;
@@ -245,6 +330,16 @@ class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst {
let Inst{5-0} = funct;
}
+class SPECIAL_SDBBP_FM : MipsR6Inst {
+ bits<20> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-6} = code_;
+ let Inst{5-0} = OPCODE6_SDBBP.Value;
+}
+
// This class is ambiguous with other branches:
// BEQC/BNEC require that rs > rt
class CMP_BRANCH_2R_OFF16_FM<OPGROUP funct> : MipsR6Inst {
@@ -355,6 +450,40 @@ class SPECIAL3_DALIGN_FM<OPCODE6 Operation> : MipsR6Inst {
let Inst{5-0} = Operation.Value;
}
+class SPECIAL3_LL_SC_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<9> offset = addr{8-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = base;
+ let Inst{20-16} = rt;
+ let Inst{15-7} = offset;
+ let Inst{5-0} = Operation.Value;
+
+ string DecoderMethod = "DecodeSpecial3LlSc";
+}
+
+class SPECIAL_LSA_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<2> imm2;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = 0b000;
+ let Inst{7-6} = imm2;
+ let Inst{5-0} = Operation.Value;
+}
+
class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst {
bits<5> rs;
bits<16> imm;
@@ -384,3 +513,31 @@ class COP1_CMP_CONDN_FM<FIELD_CMP_FORMAT Format,
let Inst{4-0} = Cond.Value;
}
+class JR_HB_R6_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = Operation.Value;
+}
+
+class COP2LDST_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> rt;
+ bits<21> addr;
+ bits<5> base = addr{20-16};
+ bits<11> offset = addr{10-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP2LDST.Value;
+ let Inst{25-21} = Operation.Value;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = base;
+ let Inst{10-0} = offset;
+}
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index ffaf965..d06e5ca 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -14,39 +14,8 @@
include "Mips32r6InstrFormats.td"
// Notes about removals/changes from MIPS32r6:
-// Unclear: ssnop
-// Reencoded: cache, pref
-// Reencoded: clo, clz
// Reencoded: jr -> jalr
// Reencoded: jr.hb -> jalr.hb
-// Reencoded: ldc2
-// Reencoded: ll, sc
-// Reencoded: lwc2
-// Reencoded: sdbbp
-// Reencoded: sdc2
-// Reencoded: swc2
-// Removed: bc1any2, bc1any4
-// Removed: bc2[ft]
-// Removed: bc2f, bc2t
-// Removed: bgezal
-// Removed: bltzal
-// Removed: c.cond.fmt, bc1[ft]
-// Removed: div, divu
-// Removed: jalx
-// Removed: ldxc1
-// Removed: luxc1
-// Removed: lwxc1
-// Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds]
-// Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul
-// Removed: movf, movt
-// Removed: movf.fmt, movt.fmt, movn.fmt, movz.fmt
-// Removed: movn, movz
-// Removed: mult, multu
-// Removed: prefx
-// Removed: sdxc1
-// Removed: suxc1
-// Removed: swxc1
-// Rencoded: [ls][wd]c2
def brtarget21 : Operand<OtherVT> {
let EncoderMethod = "getBranchTarget21OpValue";
@@ -84,6 +53,7 @@ class ALUIPC_ENC : PCREL16_FM<OPCODE5_ALUIPC>;
class AUI_ENC : AUI_FM;
class AUIPC_ENC : PCREL16_FM<OPCODE5_AUIPC>;
+class BAL_ENC : BAL_FM;
class BALC_ENC : BRANCH_OFF26_FM<0b111010>;
class BC_ENC : BRANCH_OFF26_FM<0b110010>;
class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
@@ -97,11 +67,20 @@ class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_DADDI>,
class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZL>,
DecodeDisambiguates<"BgtzlGroupBranch">;
+class BGEC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZL>,
+ DecodeDisambiguatedBy<"BlezlGroupBranch">;
+class BGEUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguatedBy<"BlezGroupBranch">;
class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>,
DecodeDisambiguates<"BlezlGroupBranch">;
class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>,
DecodeDisambiguatedBy<"BgtzGroupBranch">;
+class BLTC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZL>,
+ DecodeDisambiguatedBy<"BgtzlGroupBranch">;
+class BLTUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZ>,
+ DecodeDisambiguatedBy<"BgtzGroupBranch">;
+
class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>,
DecodeDisambiguatedBy<"BlezlGroupBranch">;
class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>,
@@ -110,7 +89,8 @@ class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZL>,
DecodeDisambiguatedBy<"BgtzlGroupBranch">;
class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>;
-class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>;
+class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguates<"BlezGroupBranch">;
class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>;
class BC1EQZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1EQZ>;
@@ -120,9 +100,10 @@ class BC2NEZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2NEZ>;
class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>;
class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>;
-
+class JR_HB_R6_ENC : JR_HB_R6_FM<OPCODE6_JALR>;
class BITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_BITSWAP>;
-class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>;
+class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>,
+ DecodeDisambiguatedBy<"BlezGroupBranch">;
class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_DADDI>,
DecodeDisambiguatedBy<"DaddiGroupBranch">;
class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
@@ -170,12 +151,23 @@ class RINT_D_ENC : COP1_2R_FM<0b011010, FIELD_FMT_D>;
class CLASS_S_ENC : COP1_2R_FM<0b011011, FIELD_FMT_S>;
class CLASS_D_ENC : COP1_2R_FM<0b011011, FIELD_FMT_D>;
-class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpnd> {
- dag OutOperandList = (outs FGROpnd:$fd);
- dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
- string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
- list<dag> Pattern = [];
-}
+class CACHE_ENC : SPECIAL3_MEM_FM<OPCODE6_CACHE>;
+class PREF_ENC : SPECIAL3_MEM_FM<OPCODE6_PREF>;
+
+class LDC2_R6_ENC : COP2LDST_FM<OPCODE5_LDC2>;
+class LWC2_R6_ENC : COP2LDST_FM<OPCODE5_LWC2>;
+class SDC2_R6_ENC : COP2LDST_FM<OPCODE5_SDC2>;
+class SWC2_R6_ENC : COP2LDST_FM<OPCODE5_SWC2>;
+
+class LSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_LSA>;
+
+class LL_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LL>;
+class SC_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SC>;
+
+class CLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLO>;
+class CLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLZ>;
+
+class SDBBP_R6_ENC : SPECIAL_SDBBP_FM;
//===----------------------------------------------------------------------===//
//
@@ -183,56 +175,65 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpn
//
//===----------------------------------------------------------------------===//
+class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
+ RegisterOperand FGROpnd,
+ SDPatternOperator Op = null_frag> {
+ dag OutOperandList = (outs FGRCCOpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [(set FGRCCOpnd:$fd, (Op FGROpnd:$fs, FGROpnd:$ft))];
+}
+
multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
RegisterOperand FGROpnd>{
- def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_F>,
- CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>,
+ def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
+ CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
ISA_MIPS32R6;
def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
- CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
ISA_MIPS32R6;
def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
- CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
ISA_MIPS32R6;
def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
- CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_OLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLT>,
- CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
ISA_MIPS32R6;
+ def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
+ ISA_MIPS32R6;
def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
- CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_OLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLE>,
- CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
ISA_MIPS32R6;
+ def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+ ISA_MIPS32R6;
def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
- CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+ ISA_MIPS32R6;
+ def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
+ CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
+ CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_SF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SF>,
- CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGLE>,
- CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>,
- ISA_MIPS32R6;
def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_NGL_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGL>,
- CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
- CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGE>,
- CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>,
+ def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
+ CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
+ CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
ISA_MIPS32R6;
- def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
- CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>,
- ISA_MIPS32R6;
- def CMP_NGT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGT>,
- CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>,
+ def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
+ CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
+ CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
ISA_MIPS32R6;
+ def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
+ CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
}
//===----------------------------------------------------------------------===//
@@ -241,16 +242,17 @@ multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
//
//===----------------------------------------------------------------------===//
-class PCREL19_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> {
dag OutOperandList = (outs GPROpnd:$rs);
- dag InOperandList = (ins simm19_lsl2:$imm);
+ dag InOperandList = (ins ImmOpnd:$imm);
string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
list<dag> Pattern = [];
}
-class ADDIUPC_DESC : PCREL19_DESC_BASE<"addiupc", GPR32Opnd>;
-class LWPC_DESC: PCREL19_DESC_BASE<"lwpc", GPR32Opnd>;
-class LWUPC_DESC: PCREL19_DESC_BASE<"lwupc", GPR32Opnd>;
+class ADDIUPC_DESC : PCREL_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2>;
+class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>;
+class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2>;
class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
Operand ImmOpnd> {
@@ -318,15 +320,26 @@ class CMP_CBR_RT_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
list<Register> Defs = [AT];
}
+class BAL_DESC : BC_DESC_BASE<"bal", brtarget> {
+ bit isCall = 1;
+ bit hasDelaySlot = 1;
+ list<Register> Defs = [RA];
+}
+
class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> {
bit isCall = 1;
list<Register> Defs = [RA];
}
class BC_DESC : BC_DESC_BASE<"bc", brtarget26>;
+class BGEC_DESC : CMP_BC_DESC_BASE<"bgec", brtarget, GPR32Opnd>;
+class BGEUC_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR32Opnd>;
class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>;
class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>;
+class BLTC_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR32Opnd>;
+class BLTUC_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR32Opnd>;
+
class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>;
class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>;
@@ -380,6 +393,14 @@ class JIC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd> {
list<Register> Defs = [AT];
}
+class JR_HB_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+ bit isBranch = 1;
+ bit isIndirectBranch = 1;
+ bit hasDelaySlot = 1;
+ bit isTerminator=1;
+ bit isBarrier=1;
+}
+
class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rt);
@@ -389,17 +410,22 @@ class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>;
-class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator Op=null_frag> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))];
+
+ // This instruction doesn't trap division by zero itself. We must insert
+ // teq instructions as well.
+ bit usesCustomInserter = 1;
}
-class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd>;
-class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd>;
-class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd>;
-class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd>;
+class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd, sdiv>;
+class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd, udiv>;
+class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd, srem>;
+class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd, urem>;
class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> {
list<Register> Defs = [RA];
@@ -424,28 +450,35 @@ class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> {
class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> {
list<Register> Defs = [RA];
}
-class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+
+class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ SDPatternOperator Op=null_frag> {
dag OutOperandList = (outs GPROpnd:$rd);
dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))];
}
-class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd>;
-class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd>;
-class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd>;
+class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd, mulhs>;
+class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd, mulhu>;
+class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd, mul>;
class MULU_DESC : MUL_R6_DESC_BASE<"mulu", GPR32Opnd>;
-class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
dag OutOperandList = (outs FGROpnd:$fd);
- dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
+ dag InOperandList = (ins FGRCCOpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
- list<dag> Pattern = [];
+ list<dag> Pattern = [(set FGROpnd:$fd, (select FGRCCOpnd:$fd_in,
+ FGROpnd:$ft,
+ FGROpnd:$fs))];
string Constraints = "$fd_in = $fd";
}
-class SEL_D_DESC : COP1_4R_DESC_BASE<"sel.d", FGR64Opnd>;
-class SEL_S_DESC : COP1_4R_DESC_BASE<"sel.s", FGR32Opnd>;
+class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+ // We must insert a SUBREG_TO_REG around $fd_in
+ bit usesCustomInserter = 1;
+}
+class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
dag OutOperandList = (outs GPROpnd:$rd);
@@ -457,6 +490,14 @@ class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
class SELEQZ_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR32Opnd>;
class SELNEZ_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR32Opnd>;
+class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+ string Constraints = "$fd_in = $fd";
+}
+
class MADDF_S_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>;
class MADDF_D_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>;
class MSUBF_S_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>;
@@ -503,6 +544,96 @@ class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
+ RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
+ string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+ list<dag> Pattern = [];
+}
+
+class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
+class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd>;
+
+class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+ dag OutOperandList = (outs COPOpnd:$rt);
+ dag InOperandList = (ins mem_simm11:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+}
+
+class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
+class LWC2_R6_DESC : COP2LD_DESC_BASE<"lwc2", COP2Opnd>;
+
+class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins COPOpnd:$rt, mem_simm11:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+}
+
+class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
+class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
+
+class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2");
+ list<dag> Pattern = [];
+}
+
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+
+class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rt);
+ dag InOperandList = (ins mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayLoad = 1;
+}
+
+class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd>;
+
+class SC_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$dst);
+ dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+ list<dag> Pattern = [];
+ bit mayStore = 1;
+ string Constraints = "$rt = $dst";
+}
+
+class SC_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd>;
+
+class CLO_CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+}
+
+class CLO_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> :
+ CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> {
+ list<dag> Pattern = [(set GPROpnd:$rd, (ctlz (not GPROpnd:$rs)))];
+}
+
+class CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> :
+ CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> {
+ list<dag> Pattern = [(set GPROpnd:$rd, (ctlz GPROpnd:$rs))];
+}
+
+class CLO_R6_DESC : CLO_R6_DESC_BASE<"clo", GPR32Opnd>;
+class CLZ_R6_DESC : CLZ_R6_DESC_BASE<"clz", GPR32Opnd>;
+
+class SDBBP_R6_DESC {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins uimm20:$code_);
+ string AsmString = "sdbbp\t$code_";
+ list<dag> Pattern = [];
+}
+
//===----------------------------------------------------------------------===//
//
// Instruction Definitions
@@ -514,6 +645,7 @@ def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6;
def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6;
def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6;
def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
+def BAL : BAL_ENC, BAL_DESC, ISA_MIPS32R6;
def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6;
def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6;
def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6;
@@ -523,8 +655,8 @@ def BC : BC_ENC, BC_DESC, ISA_MIPS32R6;
def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
-def BGEC; // Also aliased to blec with operands swapped
-def BGEUC; // Also aliased to bleuc with operands swapped
+def BGEC : BGEC_ENC, BGEC_DESC, ISA_MIPS32R6;
+def BGEUC : BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6;
def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
@@ -532,8 +664,8 @@ def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
-def BLTC; // Also aliased to bgtc with operands swapped
-def BLTUC; // Also aliased to bgtuc with operands swapped
+def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
+def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
@@ -541,15 +673,22 @@ def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
+def CACHE_R6 : CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6;
def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6;
+def CLO_R6 : CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
+def CLZ_R6 : CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>;
def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6;
def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
-// def LSA; // See MSA
+def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
+def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6;
+def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6;
+def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
+def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6;
@@ -571,13 +710,115 @@ def MUHU : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
def MULU : MULU_ENC, MULU_DESC, ISA_MIPS32R6;
def NAL; // BAL with rd=0
+def PREF_R6 : PREF_ENC, PREF_DESC, ISA_MIPS32R6;
def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6;
def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6;
-def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6;
+def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
+def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
+def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
+def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6;
def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6;
-def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6;
+def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6;
def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6;
def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6;
def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6;
+def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Patterns and Pseudo Instructions
+//
+//===----------------------------------------------------------------------===//
+
+// f32 comparisons supported via another comparison
+def : MipsPat<(setone f32:$lhs, f32:$rhs),
+ (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seto f32:$lhs, f32:$rhs),
+ (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(setune f32:$lhs, f32:$rhs),
+ (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setne f32:$lhs, f32:$rhs),
+ (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+
+// f64 comparisons supported via another comparison
+def : MipsPat<(setone f64:$lhs, f64:$rhs),
+ (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seto f64:$lhs, f64:$rhs),
+ (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(setune f64:$lhs, f64:$rhs),
+ (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
+ ISA_MIPS32R6;
+def : MipsPat<(setne f64:$lhs, f64:$rhs),
+ (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+
+// i32 selects
+def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
+ (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f),
+ (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
+ (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
+ (OR (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)),
+ (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
+ i32:$f),
+ (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))),
+ (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>,
+ ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)),
+ i32:$t, i32:$f),
+ (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))),
+ (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>,
+ ISA_MIPS32R6;
+
+def : MipsPat<(select i32:$cond, i32:$t, immz),
+ (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz),
+ (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz),
+ (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select i32:$cond, immz, i32:$f),
+ (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f),
+ (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f),
+ (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 924b325..f0b6814 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -23,6 +23,8 @@ def uimm16_64 : Operand<i64> {
// Signed Operand
def simm10_64 : Operand<i64>;
+def imm64: Operand<i64>;
+
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
return getImm(N, (unsigned)N->getZExtValue() - 32);
@@ -36,6 +38,9 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
def immSExt10_64 : PatLeaf<(i64 imm),
[{ return isInt<10>(N->getSExtValue()); }]>;
+def immZExt16_64 : PatLeaf<(i64 imm),
+ [{ return isInt<16>(N->getZExtValue()); }]>;
+
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -62,7 +67,7 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>,
- ISA_MIPS3;
+ ISA_MIPS3_NOT_32R6_64R6;
def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU,
immSExt16, add>,
ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3;
@@ -164,49 +169,58 @@ def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>,
ISA_MIPS3_NOT_32R6_64R6;
/// Load-linked, Store-conditional
-def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3;
-def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3;
+def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3_NOT_32R6_64R6;
+def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
-def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
-def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
-def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
-def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
-def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
-def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
-def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
-def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
-def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
+ def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
+ def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
+ def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
+ def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
+ def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
+ def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
+ def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
+ def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+ def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
+ def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
}
+def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
+def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
+
/// Multiply and Divide Instructions.
def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1c>, ISA_MIPS3;
+ MULT_FM<0, 0x1c>, ISA_MIPS3_NOT_32R6_64R6;
def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1d>, ISA_MIPS3;
+ MULT_FM<0, 0x1d>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult,
- II_DMULT>;
+ II_DMULT>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu,
- II_DMULTU>;
+ II_DMULTU>, ISA_MIPS3_NOT_32R6_64R6;
def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1e>, ISA_MIPS3;
+ MULT_FM<0, 0x1e>, ISA_MIPS3_NOT_32R6_64R6;
def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1f>, ISA_MIPS3;
+ MULT_FM<0, 0x1f>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem,
- II_DDIV, 0, 1, 1>;
+ II_DDIV, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6;
def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU,
- II_DDIVU, 0, 1, 1>;
+ II_DDIVU, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
-def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>;
-def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>;
-def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>;
-def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>;
-def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>;
-def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>;
-def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>;
+def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>, ISA_MIPS3_NOT_32R6_64R6;
/// Sign Ext In Register Instructions.
def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>,
@@ -216,8 +230,8 @@ def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
}
/// Count Leading
-def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64;
-def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64;
+def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64_NOT_64R6;
+def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64_NOT_64R6;
/// Double Word Swap Bytes/HalfWords
def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2;
@@ -431,13 +445,13 @@ def : MipsInstAlias<"daddu $rs, $rt, $imm",
0>;
def : MipsInstAlias<"dadd $rs, $rt, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"daddu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
0>;
def : MipsInstAlias<"dadd $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"add $rs, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
0>;
@@ -450,10 +464,22 @@ def : MipsInstAlias<"dsll $rd, $rt, $rs",
def : MipsInstAlias<"dsubu $rt, $rs, $imm",
(DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
InvertedImOperand64:$imm), 0>;
+def : MipsInstAlias<"dsubi $rs, $rt, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
+def : MipsInstAlias<"dsubi $rs, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
+def : MipsInstAlias<"dsub $rs, $rt, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
+ InvertedImOperand64:$imm),
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"dsub $rs, $imm",
(DADDi GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
- 0>;
+ 0>, ISA_MIPS3_NOT_32R6_64R6;
def : MipsInstAlias<"dsubu $rs, $imm",
(DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs,
InvertedImOperand64:$imm),
@@ -465,6 +491,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs",
(DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
ISA_MIPS3;
+class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> :
+ MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
+ !strconcat(instr_asm, "\t$rt, $imm64")> ;
+def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>;
+
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index f971218..63cf60b 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -13,10 +13,6 @@
// Notes about removals/changes from MIPS32r6:
// Reencoded: dclo, dclz
-// Reencoded: lld, scd
-// Removed: daddi
-// Removed: ddiv, ddivu, dmult, dmultu
-// Removed: div, divu
//===----------------------------------------------------------------------===//
//
@@ -29,14 +25,20 @@ class DAUI_ENC : DAUI_FM;
class DAHI_ENC : REGIMM_FM<OPCODE5_DAHI>;
class DATI_ENC : REGIMM_FM<OPCODE5_DATI>;
class DBITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_DBITSWAP>;
+class DCLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLO>;
+class DCLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLZ>;
class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>;
class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>;
+class DLSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_DLSA>;
class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>;
class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>;
-class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>;
-class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b111001>;
-class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b111000>;
-class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>;
+class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b011100>;
+class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011101>;
+class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>;
+class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b011101>;
+class LDPC_ENC : PCREL18_FM<OPCODE3_LDPC>;
+class LLD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LLD>;
+class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>;
//===----------------------------------------------------------------------===//
//
@@ -56,14 +58,22 @@ class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>;
class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>;
class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>;
class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>;
-class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>;
-class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd>;
-class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd>;
-class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd>;
-class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd>;
-class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd>;
-class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd>;
+class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
+class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
+class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
+class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
+class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
+class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
+class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
+class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, mulhu>;
+class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, mul>;
class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd>;
+class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3>;
+class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd>;
+class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd>;
+class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
+class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
//===----------------------------------------------------------------------===//
//
@@ -76,13 +86,132 @@ def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
+def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
+def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
-// def DLSA; // See MSA
+def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6;
def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6;
def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6;
-def LDPC;
+def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6;
+def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS32R6;
+def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6;
+let DecoderNamespace = "Mips32r6_64r6_GP64" in {
+ def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64;
+ def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Aliases
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsInstAlias<"jr $rs", (JALR64 ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS64R6;
+
+//===----------------------------------------------------------------------===//
+//
+// Patterns and Pseudo Instructions
+//
+//===----------------------------------------------------------------------===//
+
+// i64 selects
+def : MipsPat<(select i64:$cond, i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, i64:$cond),
+ (SELEQZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, i64:$cond),
+ (SELNEZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, i64:$cond),
+ (SELEQZ64 i64:$f, i64:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)),
+ (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)),
+ (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>,
+ ISA_MIPS64R6;
+def : MipsPat<
+ (select (i32 (setgt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t,
+ (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)),
+ (SELNEZ64 i64:$f,
+ (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)))>,
+ ISA_MIPS64R6;
+def : MipsPat<
+ (select (i32 (setugt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t,
+ (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)),
+ (SELNEZ64 i64:$f,
+ (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)),
+ sub_32)))>,
+ ISA_MIPS64R6;
+
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, immz),
+ (SELNEZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, immz),
+ (SELEQZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i64:$cond, immz)), immz, i64:$f),
+ (SELEQZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i64:$cond, immz)), immz, i64:$f),
+ (SELNEZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6;
+
+// i64 selects from an i32 comparison
+// One complicating factor here is that bits 32-63 of an i32 are undefined.
+// FIXME: Ideally, setcc would always produce an i64 on MIPS64 targets.
+// This would allow us to remove the sign-extensions here.
+def : MipsPat<(select i32:$cond, i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
+ (OR64 (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))),
+ (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f),
+ (OR64 (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))),
+ (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond,
+ immZExt16:$imm))))>,
+ ISA_MIPS64R6;
+
+def : MipsPat<(select i32:$cond, i64:$t, immz),
+ (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, immz),
+ (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, immz),
+ (SELEQZ64 i64:$t, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select i32:$cond, immz, i64:$f),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f),
+ (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
+def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f),
+ (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>,
+ ISA_MIPS64R6;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 6df90aa..1fb75a2 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -91,7 +91,46 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
#include "MipsGenMCPseudoLowering.inc"
+// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to JR, JR_MM,
+// JALR, or JALR64 as appropriate for the target
+void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
+ const MachineInstr *MI) {
+ bool HasLinkReg = false;
+ MCInst TmpInst0;
+
+ if (Subtarget->hasMips64r6()) {
+ // MIPS64r6 should use (JALR64 ZERO_64, $rs)
+ TmpInst0.setOpcode(Mips::JALR64);
+ HasLinkReg = true;
+ } else if (Subtarget->hasMips32r6()) {
+ // MIPS32r6 should use (JALR ZERO, $rs)
+ TmpInst0.setOpcode(Mips::JALR);
+ HasLinkReg = true;
+ } else if (Subtarget->inMicroMipsMode())
+ // microMIPS should use (JR_MM $rs)
+ TmpInst0.setOpcode(Mips::JR_MM);
+ else {
+ // Everything else should use (JR $rs)
+ TmpInst0.setOpcode(Mips::JR);
+ }
+
+ MCOperand MCOp;
+
+ if (HasLinkReg) {
+ unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
+ TmpInst0.addOperand(MCOperand::CreateReg(ZeroReg));
+ }
+
+ lowerOperand(MI->getOperand(0), MCOp);
+ TmpInst0.addOperand(MCOp);
+
+ EmitToStreamer(OutStreamer, TmpInst0);
+}
+
void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MipsTargetStreamer &TS = getTargetStreamer();
+ TS.setCanHaveModuleDir(false);
+
if (MI->isDebugValue()) {
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -141,6 +180,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(OutStreamer, &*I))
continue;
+ if (I->getOpcode() == Mips::PseudoReturn ||
+ I->getOpcode() == Mips::PseudoReturn64 ||
+ I->getOpcode() == Mips::PseudoIndirectBranch ||
+ I->getOpcode() == Mips::PseudoIndirectBranch64) {
+ emitPseudoIndirectBranch(OutStreamer, &*I);
+ continue;
+ }
+
// The inMips16Mode() test is not permanent.
// Some instructions are marked as pseudo right now which
// would make the test fail for the wrong reason but
@@ -657,6 +704,13 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutContext.getELFSection(".gcc_compiled_long64", ELF::SHT_PROGBITS, 0,
SectionKind::getDataRel()));
}
+
+ getTargetStreamer().updateABIInfo(*Subtarget);
+ getTargetStreamer().emitDirectiveModuleFP();
+
+ if (Subtarget->isABI_O32())
+ getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(),
+ Subtarget->isABI_O32());
}
void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) {
@@ -852,7 +906,7 @@ void MipsAsmPrinter::EmitFPCallStub(
TS.emitDirectiveSetNoMicroMips();
//
// .ent __call_stub_fp_xxxx
- // .type __call_stub_fp_xxxx,@function
+ // .type __call_stub_fp_xxxx,@function
// __call_stub_fp_xxxx:
//
std::string x = "__call_stub_fp_" + std::string(Symbol);
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index e82b145..967aa0b 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -40,6 +40,12 @@ private:
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
+ // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch,
+ // and PseudoIndirectBranch64 as a JR, JR_MM, JALR, or JALR64 as appropriate
+ // for the target.
+ void emitPseudoIndirectBranch(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
// lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index c83d880..007213c 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -239,6 +239,11 @@ def RetCC_Mips : CallingConv<[
def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
(sequence "S%u", 7, 0))>;
+def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
+ (sequence "S%u", 7, 0))> {
+ let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2));
+}
+
def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
(sequence "S%u", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 13fa546..151ef13 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -124,6 +124,7 @@ private:
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const;
/// Expand pseudo instructions with accumulator register operands.
void expandACCInstr(MachineBasicBlock::instr_iterator MI,
@@ -273,6 +274,12 @@ unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI,
unsigned OpNo) const {
llvm_unreachable("Unimplemented function.");
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 7177f65..690f626 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -104,136 +104,162 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
// Instantiation of instructions.
def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>, INSN_MIPS4_32;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6;
}
def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>, INSN_MIPS4_32;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in {
def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6;
}
def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>, INSN_MIPS4_32;
+ CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>, AdditionalRequires<[HasMips64]>;
+ CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[HasMips64]>;
def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>, INSN_MIPS4_32;
+ CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>, AdditionalRequires<[IsGP64bit]>;
+ CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd,
II_MOVZ_D>, CMov_I_F_FM<18, 17>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd,
II_MOVN_D>, CMov_I_F_FM<19, 17>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
let DecoderNamespace = "Mips64" in {
def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>,
- CMov_I_F_FM<18, 17>, INSN_MIPS4_32, FGR_64;
+ CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>,
- CMov_I_F_FM<19, 17>, INSN_MIPS4_32, FGR_64;
+ CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
let isCodeGenOnly = 1 in {
- def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd,
- II_MOVZ_D>, CMov_I_F_FM<18, 17>, FGR_64;
- def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd,
- II_MOVN_D>, CMov_I_F_FM<19, 17>, FGR_64;
+ def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, II_MOVZ_D>,
+ CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+ def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, II_MOVN_D>,
+ CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
}
}
def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>, INSN_MIPS4_32;
+ CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>, AdditionalRequires<[IsGP64bit]>;
+ CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>, INSN_MIPS4_32;
+ CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6;
let isCodeGenOnly = 1 in
def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>, AdditionalRequires<[IsGP64bit]>;
+ CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6,
+ AdditionalRequires<[IsGP64bit]>;
def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>,
- CMov_F_F_FM<16, 1>, INSN_MIPS4_32;
+ CMov_F_F_FM<16, 1>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>,
- CMov_F_F_FM<16, 0>, INSN_MIPS4_32;
+ CMov_F_F_FM<16, 0>, INSN_MIPS4_32_NOT_32R6_64R6;
def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D,
MipsCMovFP_T>, CMov_F_F_FM<17, 1>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D,
MipsCMovFP_F>, CMov_F_F_FM<17, 0>,
- INSN_MIPS4_32, FGR_32;
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
let DecoderNamespace = "Mips64" in {
def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>,
- CMov_F_F_FM<17, 1>, INSN_MIPS4_32, FGR_64;
+ CMov_F_F_FM<17, 1>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>,
- CMov_F_F_FM<17, 0>, INSN_MIPS4_32, FGR_64;
+ CMov_F_F_FM<17, 0>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
}
// Instantiation of conditional move patterns.
-defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
-defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>;
-defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>;
+defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>, INSN_MIPS4_32_NOT_32R6_64R6;
-defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>, GPR_64;
+defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovzPats0<GPR64, GPR32, MOVZ_I_I, SLT64, SLTu64, SLTi64, SLTiu64>,
- GPR_64;
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
defm : MovzPats0<GPR64, GPR64, MOVZ_I_I64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+
+defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+
+defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
-defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>, GPR_64;
-defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>, GPR_64;
-defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>, GPR_64;
-defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>, GPR_64;
-defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>, GPR_64;
-defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>, GPR_64;
-
-defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>;
-
-defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, GPR_64;
-defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, GPR_64;
-defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, GPR_64;
-defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
-defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>;
-defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>;
+defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
+defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6;
defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, GPR_64;
+defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
+ GPR_64;
+defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
GPR_64;
-defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, GPR_64;
-defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, GPR_64;
-defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>, FGR_32;
-defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, FGR_32;
-defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, FGR_32;
+defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_32;
+defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_32;
+defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_32;
-defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>, FGR_64;
+defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
defm : MovzPats0<GPR64, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_64;
+defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>,
+ INSN_MIPS4_32_NOT_32R6_64R6, FGR_64;
+defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6,
+ FGR_64;
+defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6,
FGR_64;
-defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, FGR_64;
-defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>, FGR_64;
-defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, FGR_64;
-defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, FGR_64;
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index cf09113..b5d52ce 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -7,9 +7,9 @@
//
//===----------------------------------------------------------------------===//
-def HasDSP : Predicate<"Subtarget.hasDSP()">,
+def HasDSP : Predicate<"Subtarget->hasDSP()">,
AssemblerPredicate<"FeatureDSP">;
-def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">,
+def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
AssemblerPredicate<"FeatureDSPR2">;
// Fields.
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d6c7cac..bcfbc12 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -177,6 +178,13 @@ namespace {
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
Changed |= runOnMachineBasicBlock(*FI);
+
+ // This pass invalidates liveness information when it reorders
+ // instructions to fill delay slot. Without this, -verify-machineinstrs
+ // will fail.
+ if (Changed)
+ F.getRegInfo().invalidateLiveness();
+
return Changed;
}
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 268a0ed..617801b 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -12,6 +12,7 @@
#include "MipsISelLowering.h"
#include "MipsMachineFunction.h"
#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
using namespace llvm;
@@ -36,11 +37,11 @@ class MipsFastISel final : public FastISel {
/// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
/// make the right decision when generating code for different targets.
- const MipsSubtarget *Subtarget;
Module &M;
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
+ const MipsSubtarget *Subtarget;
MipsFunctionInfo *MFI;
// Convenience variables to avoid some queries.
@@ -54,8 +55,8 @@ public:
: FastISel(funcInfo, libInfo),
M(const_cast<Module &>(*funcInfo.Fn->getParent())),
TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
- Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ TLI(*TM.getTargetLowering()),
+ Subtarget(&TM.getSubtarget<MipsSubtarget>()) {
MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
Context = &funcInfo.Fn->getContext();
TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) &&
@@ -68,8 +69,11 @@ public:
bool ComputeAddress(const Value *Obj, Address &Addr);
private:
+ bool EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment = 0);
bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
unsigned Alignment = 0);
+ bool SelectLoad(const Instruction *I);
bool SelectRet(const Instruction *I);
bool SelectStore(const Instruction *I);
@@ -80,6 +84,36 @@ private:
unsigned MaterializeGV(const GlobalValue *GV, MVT VT);
unsigned MaterializeInt(const Constant *C, MVT VT);
unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+
+ // for some reason, this default is not generated by tablegen
+ // so we explicitly generate it here.
+ //
+ unsigned FastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill, uint64_t imm1,
+ uint64_t imm2, unsigned Op3, bool Op3IsKill) {
+ return 0;
+ }
+
+ MachineInstrBuilder EmitInst(unsigned Opc) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ }
+
+ MachineInstrBuilder EmitInst(unsigned Opc, unsigned DstReg) {
+ return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ DstReg);
+ }
+
+ MachineInstrBuilder EmitInstStore(unsigned Opc, unsigned SrcReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return EmitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset);
+ }
+
+ MachineInstrBuilder EmitInstLoad(unsigned Opc, unsigned DstReg,
+ unsigned MemReg, int64_t MemOffset) {
+ return EmitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset);
+ }
+
+#include "MipsGenFastISel.inc"
};
bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
@@ -100,6 +134,8 @@ bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// We will extend this in a later patch:
// If this is a type than can be sign or zero-extended to a basic operation
// go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
return false;
}
@@ -116,6 +152,45 @@ bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) {
return Addr.Base.Reg != 0;
}
+bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ unsigned Alignment) {
+ //
+ // more cases will be handled here in following patches.
+ //
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ case MVT::i32: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LW;
+ break;
+ }
+ case MVT::i16: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LHu;
+ break;
+ }
+ case MVT::i8: {
+ ResultReg = createResultReg(&Mips::GPR32RegClass);
+ Opc = Mips::LBu;
+ break;
+ }
+ case MVT::f32: {
+ ResultReg = createResultReg(&Mips::FGR32RegClass);
+ Opc = Mips::LWC1;
+ break;
+ }
+ case MVT::f64: {
+ ResultReg = createResultReg(&Mips::AFGR64RegClass);
+ Opc = Mips::LDC1;
+ break;
+ }
+ default:
+ return false;
+ }
+ EmitInstLoad(Opc, ResultReg, Addr.Base.Reg, Addr.Offset);
+ return true;
+}
+
// Materialize a constant into a register, and return the register
// number (or zero if we failed to handle it).
unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) {
@@ -141,12 +216,49 @@ bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
//
// more cases will be handled here in following patches.
//
- if (VT != MVT::i32)
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opc = Mips::SB;
+ break;
+ case MVT::i16:
+ Opc = Mips::SH;
+ break;
+ case MVT::i32:
+ Opc = Mips::SW;
+ break;
+ case MVT::f32:
+ Opc = Mips::SWC1;
+ break;
+ case MVT::f64:
+ Opc = Mips::SDC1;
+ break;
+ default:
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::SW))
- .addReg(SrcReg)
- .addReg(Addr.Base.Reg)
- .addImm(Addr.Offset);
+ }
+ EmitInstStore(Opc, SrcReg, Addr.Base.Reg, Addr.Offset);
+ return true;
+}
+
+bool MipsFastISel::SelectLoad(const Instruction *I) {
+ // Atomic loads need special handling.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg;
+ if (!EmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+ return false;
+ UpdateValueMap(I, ResultReg);
return true;
}
@@ -186,8 +298,7 @@ bool MipsFastISel::SelectRet(const Instruction *I) {
if (Ret->getNumOperands() > 0) {
return false;
}
- unsigned RetOpc = Mips::RetRA;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc));
+ EmitInst(Mips::RetRA);
return true;
}
@@ -197,6 +308,8 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
+ case Instruction::Load:
+ return SelectLoad(I);
case Instruction::Store:
return SelectStore(I);
case Instruction::Ret:
@@ -207,6 +320,22 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
}
unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+ if (VT == MVT::f32) {
+ const TargetRegisterClass *RC = &Mips::FGR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg = Materialize32BitInt(Imm, &Mips::GPR32RegClass);
+ EmitInst(Mips::MTC1, DestReg).addReg(TempReg);
+ return DestReg;
+ } else if (VT == MVT::f64) {
+ const TargetRegisterClass *RC = &Mips::AFGR64RegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned TempReg1 = Materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass);
+ unsigned TempReg2 =
+ Materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass);
+ EmitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1);
+ return DestReg;
+ }
return 0;
}
@@ -221,9 +350,8 @@ unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) {
// TLS not supported at this time.
if (IsThreadLocal)
return 0;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LW), DestReg)
- .addReg(MFI->getGlobalBaseReg())
- .addGlobalAddress(GV, 0, MipsII::MO_GOT);
+ EmitInst(Mips::LW, DestReg).addReg(MFI->getGlobalBaseReg()).addGlobalAddress(
+ GV, 0, MipsII::MO_GOT);
return DestReg;
}
unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) {
@@ -245,15 +373,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
if (isInt<16>(Imm)) {
unsigned Opc = Mips::ADDiu;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Mips::ZERO)
- .addImm(Imm);
+ EmitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm);
return ResultReg;
} else if (isUInt<16>(Imm)) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
- ResultReg)
- .addReg(Mips::ZERO)
- .addImm(Imm);
+ EmitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm);
return ResultReg;
}
unsigned Lo = Imm & 0xFFFF;
@@ -261,16 +384,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
if (Lo) {
// Both Lo and Hi have nonzero bits.
unsigned TmpReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
- TmpReg).addImm(Hi);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
- ResultReg)
- .addReg(TmpReg)
- .addImm(Lo);
-
+ EmitInst(Mips::LUi, TmpReg).addImm(Hi);
+ EmitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo);
} else {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
- ResultReg).addImm(Hi);
+ EmitInst(Mips::LUi, ResultReg).addImm(Hi);
}
return ResultReg;
}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index e10a3a5..8e9196c 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -15,7 +15,6 @@
#define MIPS_FRAMEINFO_H
#include "Mips.h"
-#include "MipsSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 90cff63..0bdabf3 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -47,6 +47,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
processFunctionAfterISel(MF);
@@ -202,7 +203,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
- assert((Subtarget.systemSupportsUnalignedAccess() ||
+ assert((Subtarget->systemSupportsUnalignedAccess() ||
cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
cast<MemSDNode>(Node)->getAlignment()) &&
"Unexpected unaligned loads/stores.");
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 13becb6..2a6c875 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -32,7 +32,7 @@ namespace llvm {
class MipsDAGToDAGISel : public SelectionDAGISel {
public:
explicit MipsDAGToDAGISel(MipsTargetMachine &TM)
- : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
+ : SelectionDAGISel(TM), Subtarget(&TM.getSubtarget<MipsSubtarget>()) {}
// Pass Name
const char *getPassName() const override {
@@ -46,7 +46,7 @@ protected:
/// Keep a pointer to the MipsSubtarget around so that we can make the right
/// decision when generating code for different targets.
- const MipsSubtarget &Subtarget;
+ const MipsSubtarget *Subtarget;
private:
// Include the pieces autogenerated from the target description.
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index bfe5ea1..b7af2d4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -215,6 +215,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
// setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+ // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA
+ // does. Integer booleans still use 0 and 1.
+ if (Subtarget->hasMips32r6())
+ setBooleanContents(ZeroOrOneBooleanContent,
+ ZeroOrNegativeOneBooleanContent);
// Load extented operations for i1 types must be promoted
setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
@@ -251,7 +256,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
@@ -263,14 +268,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
}
- if (!isGP64bit()) {
+ if (!Subtarget->isGP64bit()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
setOperationAction(ISD::ADD, MVT::i32, Custom);
- if (isGP64bit())
+ if (Subtarget->isGP64bit())
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
@@ -287,7 +292,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -368,7 +374,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
if (!Subtarget->hasMips64r2())
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
@@ -384,12 +390,13 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setMinFunctionAlignment(isGP64bit() ? 3 : 2);
+ setMinFunctionAlignment(Subtarget->isGP64bit() ? 3 : 2);
- setStackPointerRegisterToSaveRestore(isN64() ? Mips::SP_64 : Mips::SP);
+ setStackPointerRegisterToSaveRestore(Subtarget->isABI_N64() ? Mips::SP_64
+ : Mips::SP);
- setExceptionPointerRegister(isN64() ? Mips::A0_64 : Mips::A0);
- setExceptionSelectorRegister(isN64() ? Mips::A1_64 : Mips::A1);
+ setExceptionPointerRegister(Subtarget->isABI_N64() ? Mips::A0_64 : Mips::A0);
+ setExceptionSelectorRegister(Subtarget->isABI_N64() ? Mips::A1_64 : Mips::A1);
MaxStoresPerMemcpy = 16;
@@ -815,10 +822,10 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
return VReg;
}
-static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
- MachineBasicBlock &MBB,
- const TargetInstrInfo &TII,
- bool Is64Bit) {
+static MachineBasicBlock *insertDivByZeroTrap(MachineInstr *MI,
+ MachineBasicBlock &MBB,
+ const TargetInstrInfo &TII,
+ bool Is64Bit) {
if (NoZeroDivCheck)
return &MBB;
@@ -836,6 +843,10 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI,
// Clear Divisor's kill flag.
Divisor.setIsKill(false);
+
+ // We would normally delete the original instruction here but in this case
+ // we only needed to inject an additional instruction rather than replace it.
+
return &MBB;
}
@@ -918,10 +929,22 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitAtomicCmpSwap(MI, BB, 8);
case Mips::PseudoSDIV:
case Mips::PseudoUDIV:
- return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), false);
+ case Mips::DIV:
+ case Mips::DIVU:
+ case Mips::MOD:
+ case Mips::MODU:
+ return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+ false);
case Mips::PseudoDSDIV:
case Mips::PseudoDUDIV:
- return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), true);
+ case Mips::DDIV:
+ case Mips::DDIVU:
+ case Mips::DMOD:
+ case Mips::DMODU:
+ return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(),
+ true);
+ case Mips::SEL_D:
+ return emitSEL_D(MI, BB);
}
}
@@ -941,16 +964,20 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned LL, SC, AND, NOR, ZERO, BEQ;
if (Size == 4) {
- LL = isMicroMips ? Mips::LL_MM : Mips::LL;
- SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+ if (isMicroMips) {
+ LL = Mips::LL_MM;
+ SC = Mips::SC_MM;
+ } else {
+ LL = Subtarget->hasMips32r6() ? Mips::LL : Mips::LL_R6;
+ SC = Subtarget->hasMips32r6() ? Mips::SC : Mips::SC_R6;
+ }
AND = Mips::AND;
NOR = Mips::NOR;
ZERO = Mips::ZERO;
BEQ = Mips::BEQ;
- }
- else {
- LL = Mips::LLD;
- SC = Mips::SCD;
+ } else {
+ LL = Subtarget->hasMips64r6() ? Mips::LLD : Mips::LLD_R6;
+ SC = Subtarget->hasMips64r6() ? Mips::SCD : Mips::SCD_R6;
AND = Mips::AND64;
NOR = Mips::NOR64;
ZERO = Mips::ZERO_64;
@@ -1012,11 +1039,39 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return exitMBB;
}
-MachineBasicBlock *
-MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode,
- bool Nand) const {
+MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg(
+ MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg,
+ unsigned SrcReg) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
+
+ if (Subtarget->hasMips32r2() && Size == 1) {
+ BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg);
+ return BB;
+ }
+
+ if (Subtarget->hasMips32r2() && Size == 2) {
+ BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg);
+ return BB;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+ unsigned ScrReg = RegInfo.createVirtualRegister(RC);
+
+ assert(Size < 32);
+ int64_t ShiftImm = 32 - (Size * 8);
+
+ BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm);
+ BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm);
+
+ return BB;
+}
+
+MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
+ MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode,
+ bool Nand) const {
assert((Size == 1 || Size == 2) &&
"Unsupported size for EmitAtomicBinaryPartial.");
@@ -1046,7 +1101,6 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
unsigned StoreVal = RegInfo.createVirtualRegister(RC);
unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
unsigned SrlRes = RegInfo.createVirtualRegister(RC);
- unsigned SllRes = RegInfo.createVirtualRegister(RC);
unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
@@ -1152,19 +1206,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI,
// sinkMBB:
// and maskedoldval1,oldval,mask
// srl srlres,maskedoldval1,shiftamt
- // sll sllres,srlres,24
- // sra dest,sllres,24
+ // sign_extend dest,srlres
BB = sinkMBB;
- int64_t ShiftImm = (Size == 1) ? 24 : 16;
BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
.addReg(OldVal).addReg(Mask);
BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(MaskedOldVal1).addReg(ShiftAmt);
- BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
- .addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
- .addReg(SllRes).addImm(ShiftImm);
+ BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
MI->eraseFromParent(); // The instruction is gone now.
@@ -1285,7 +1334,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
unsigned StoreVal = RegInfo.createVirtualRegister(RC);
unsigned SrlRes = RegInfo.createVirtualRegister(RC);
- unsigned SllRes = RegInfo.createVirtualRegister(RC);
unsigned Success = RegInfo.createVirtualRegister(RC);
// insert new blocks after the current block
@@ -1382,23 +1430,44 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
// sinkMBB:
// srl srlres,maskedoldval0,shiftamt
- // sll sllres,srlres,24
- // sra dest,sllres,24
+ // sign_extend dest,srlres
BB = sinkMBB;
- int64_t ShiftImm = (Size == 1) ? 24 : 16;
BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
.addReg(MaskedOldVal0).addReg(ShiftAmt);
- BuildMI(BB, DL, TII->get(Mips::SLL), SllRes)
- .addReg(SrlRes).addImm(ShiftImm);
- BuildMI(BB, DL, TII->get(Mips::SRA), Dest)
- .addReg(SllRes).addImm(ShiftImm);
+ BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
MI->eraseFromParent(); // The instruction is gone now.
return exitMBB;
}
+MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ MachineFunction *MF = BB->getParent();
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ MachineBasicBlock::iterator II(MI);
+
+ unsigned Fc = MI->getOperand(1).getReg();
+ const auto &FGR64RegClass = TRI->getRegClass(Mips::FGR64RegClassID);
+
+ unsigned Fc2 = RegInfo.createVirtualRegister(FGR64RegClass);
+
+ BuildMI(*BB, II, DL, TII->get(Mips::SUBREG_TO_REG), Fc2)
+ .addImm(0)
+ .addReg(Fc)
+ .addImm(Mips::sub_lo);
+
+ // We don't erase the original instruction, we just replace the condition
+ // register with the 64-bit super-register.
+ MI->getOperand(1).setReg(Fc2);
+
+ return BB;
+}
+
//===----------------------------------------------------------------------===//
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
@@ -1421,7 +1490,8 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
0);
Chain = Addr.getValue(1);
- if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || isN64()) {
+ if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) ||
+ Subtarget->isABI_N64()) {
// For PIC, the sequence is:
// BRIND(load(Jumptable + index) + RelocBase)
// RelocBase can be JumpTable, GOT or some sort of global base.
@@ -1439,6 +1509,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Dest = Op.getOperand(2);
SDLoc DL(Op);
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue CondRes = createFPCmp(DAG, Op.getOperand(1));
// Return if flag is not set by a floating point comparison.
@@ -1458,6 +1529,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsTargetLowering::
lowerSELECT(SDValue Op, SelectionDAG &DAG) const
{
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op.getOperand(0));
// Return if flag is not set by a floating point comparison.
@@ -1483,6 +1555,7 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
}
SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op);
assert(Cond.getOpcode() == MipsISD::FPCmp &&
@@ -1502,7 +1575,8 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = N->getGlobal();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) {
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64()) {
const MipsTargetObjectFile &TLOF =
(const MipsTargetObjectFile&)getObjFileLowering();
@@ -1521,15 +1595,18 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
}
if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV)))
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
if (LargeGOT)
return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
MipsII::MO_GOT_LO16, DAG.getEntryNode(),
MachinePointerInfo::getGOT());
- return getAddrGlobal(N, Ty, DAG, (isN32() || isN64()) ? MipsII::MO_GOT_DISP
- : MipsII::MO_GOT16,
+ return getAddrGlobal(N, Ty, DAG,
+ (Subtarget->isABI_N32() || Subtarget->isABI_N64())
+ ? MipsII::MO_GOT_DISP
+ : MipsII::MO_GOT16,
DAG.getEntryNode(), MachinePointerInfo::getGOT());
}
@@ -1538,10 +1615,12 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::
@@ -1579,7 +1658,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, PtrTy, TlsGetAddr, &Args, 0);
+ .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1629,10 +1708,12 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::
@@ -1650,10 +1731,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
EVT Ty = Op.getValueType();
- if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64())
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_ &&
+ !Subtarget->isABI_N64())
return getAddrNonPIC(N, Ty, DAG);
- return getAddrLocal(N, Ty, DAG, isN32() || isN64());
+ return getAddrLocal(N, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
}
SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1784,8 +1867,9 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
- isN64() ? Mips::FP_64 : Mips::FP, VT);
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL,
+ Subtarget->isABI_N64() ? Mips::FP_64 : Mips::FP, VT);
return FrameAddr;
}
@@ -1801,7 +1885,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MVT VT = Op.getSimpleValueType();
- unsigned RA = isN64() ? Mips::RA_64 : Mips::RA;
+ unsigned RA = Subtarget->isABI_N64() ? Mips::RA_64 : Mips::RA;
MFI->setReturnAddressIsTaken(true);
// Return RA, which contains the return address. Mark it an implicit live-in.
@@ -1823,12 +1907,12 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
SDValue Offset = Op.getOperand(1);
SDValue Handler = Op.getOperand(2);
SDLoc DL(Op);
- EVT Ty = isN64() ? MVT::i64 : MVT::i32;
+ EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
// Store stack offset in V1, store jump target in V0. Glue CopyToReg and
// EH_RETURN nodes, so that instructions are emitted back-to-back.
- unsigned OffsetReg = isN64() ? Mips::V1_64 : Mips::V1;
- unsigned AddrReg = isN64() ? Mips::V0_64 : Mips::V0;
+ unsigned OffsetReg = Subtarget->isABI_N64() ? Mips::V1_64 : Mips::V1;
+ unsigned AddrReg = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue());
Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1));
return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain,
@@ -2256,8 +2340,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// in PIC mode) allow symbols to be resolved via lazy binding.
// The lazy binding stub requires GP to point to the GOT.
if (IsPICCall && !InternalLinkage) {
- unsigned GPReg = isN64() ? Mips::GP_64 : Mips::GP;
- EVT Ty = isN64() ? MVT::i64 : MVT::i32;
+ unsigned GPReg = Subtarget->isABI_N64() ? Mips::GP_64 : Mips::GP;
+ EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32;
RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty)));
}
@@ -2326,8 +2410,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
getTargetMachine(), ArgLocs, *DAG.getContext());
MipsCC::SpecialCallingConvType SpecialCallingConv =
getSpecialCallingConv(Callee);
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo,
- SpecialCallingConv);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo, SpecialCallingConv);
MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
Subtarget->mipsSEUsesSoftFloat(),
@@ -2360,7 +2444,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL);
SDValue StackPtr = DAG.getCopyFromReg(
- Chain, DL, isN64() ? Mips::SP_64 : Mips::SP, getPointerTy());
+ Chain, DL, Subtarget->isABI_N64() ? Mips::SP_64 : Mips::SP,
+ getPointerTy());
// With EABI is it possible to have 16 args on registers.
std::deque< std::pair<unsigned, SDValue> > RegsToPass;
@@ -2446,8 +2531,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
- bool IsPICCall = (isN64() || IsPIC); // true if calls are translated to
- // jalr $25
+ bool IsPICCall =
+ (Subtarget->isABI_N64() || IsPIC); // true if calls are translated to
+ // jalr $25
bool GlobalOrExternal = false, InternalLinkage = false;
SDValue CalleeLo;
EVT Ty = Callee.getValueType();
@@ -2458,7 +2544,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InternalLinkage = Val->hasInternalLinkage();
if (InternalLinkage)
- Callee = getAddrLocal(G, Ty, DAG, isN32() || isN64());
+ Callee = getAddrLocal(G, Ty, DAG,
+ Subtarget->isABI_N32() || Subtarget->isABI_N64());
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16, Chain,
@@ -2474,7 +2561,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- if (!isN64() && !IsPIC) // !N64 && static
+ if (!Subtarget->isABI_N64() && !IsPIC) // !N64 && static
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
else if (LargeGOT)
@@ -2525,7 +2612,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(),
CallNode, RetTy);
@@ -2572,7 +2660,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat();
@@ -2634,7 +2723,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
(RegVT == MVT::i64 && ValVT == MVT::f64) ||
(RegVT == MVT::f64 && ValVT == MVT::i64))
ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue);
- else if (isO32() && RegVT == MVT::i32 && ValVT == MVT::f64) {
+ else if (Subtarget->isABI_O32() && RegVT == MVT::i32 &&
+ ValVT == MVT::f64) {
unsigned Reg2 = addLiveIn(DAG.getMachineFunction(),
getNextIntArgReg(ArgReg), RC);
SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT);
@@ -2672,7 +2762,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg) {
Reg = MF.getRegInfo().createVirtualRegister(
- getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
+ getRegClassFor(Subtarget->isABI_N64() ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
@@ -2723,7 +2813,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs,
*DAG.getContext());
- MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo);
+ MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(),
+ CCInfo);
// Analyze return values.
MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(),
@@ -2759,7 +2850,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
if (!Reg)
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
- unsigned V0 = isN64() ? Mips::V0_64 : Mips::V0;
+ unsigned V0 = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0;
Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
Flag = Chain.getValue(1);
@@ -2980,9 +3071,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
return std::make_pair(0U, &Mips::CPU16RegsRegClass);
return std::make_pair(0U, &Mips::GPR32RegClass);
}
- if (VT == MVT::i64 && !isGP64bit())
+ if (VT == MVT::i64 && !Subtarget->isGP64bit())
return std::make_pair(0U, &Mips::GPR32RegClass);
- if (VT == MVT::i64 && isGP64bit())
+ if (VT == MVT::i64 && Subtarget->isGP64bit())
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
return std::make_pair(0U, nullptr);
@@ -3169,7 +3260,7 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
}
unsigned MipsTargetLowering::getJumpTableEncoding() const {
- if (isN64())
+ if (Subtarget->isABI_N64())
return MachineJumpTableInfo::EK_GPRel64BlockAddress;
return TargetLowering::getJumpTableEncoding();
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4ac33bf..4701bc4 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -17,7 +17,6 @@
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
-#include "MipsSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
@@ -210,6 +209,7 @@ namespace llvm {
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
class MipsFunctionInfo;
+ class MipsSubtarget;
class MipsTargetLowering : public TargetLowering {
bool isMicroMips;
@@ -438,12 +438,6 @@ namespace llvm {
// Subtarget Info
const MipsSubtarget *Subtarget;
- bool hasMips64() const { return Subtarget->hasMips64(); }
- bool isGP64bit() const { return Subtarget->isGP64bit(); }
- bool isO32() const { return Subtarget->isABI_O32(); }
- bool isN32() const { return Subtarget->isABI_N32(); }
- bool isN64() const { return Subtarget->isABI_N64(); }
-
private:
// Create a TargetGlobalAddress node.
SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
@@ -598,6 +592,12 @@ namespace llvm {
unsigned getJumpTableEncoding() const override;
+ /// Emit a sign-extension using sll/sra, seb, or seh appropriately.
+ MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size, unsigned DstReg,
+ unsigned SrcRec) const;
+
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI,
@@ -607,6 +607,7 @@ namespace llvm {
MachineBasicBlock *BB, unsigned Size) const;
MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI,
MachineBasicBlock *BB, unsigned Size) const;
+ MachineBasicBlock *emitSEL_D(MachineInstr *MI, MachineBasicBlock *BB) const;
};
/// Create MipsTargetLowering objects.
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 32cda3b..2260d53 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -57,13 +57,13 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in
// Feature predicates.
//===----------------------------------------------------------------------===//
-def IsFP64bit : Predicate<"Subtarget.isFP64bit()">,
+def IsFP64bit : Predicate<"Subtarget->isFP64bit()">,
AssemblerPredicate<"FeatureFP64Bit">;
-def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">,
+def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">,
AssemblerPredicate<"!FeatureFP64Bit">;
-def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
+def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">,
AssemblerPredicate<"FeatureSingleFloat">;
-def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
+def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">,
AssemblerPredicate<"!FeatureSingleFloat">;
//===----------------------------------------------------------------------===//
@@ -153,6 +153,15 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"),
[(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>;
+class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
+ InstrItinClass Itin> :
+ InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt),
+ !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr> {
+ // $fs_in is part of a white lie to work around a widespread bug in the FPU
+ // implementation. See expandBuildPairF64 for details.
+ let Constraints = "$fs = $fs_in";
+}
+
class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> :
InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
@@ -249,11 +258,11 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>;
}
-defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>;
-defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>,
+defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6;
+defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
-defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>,
+defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
@@ -355,8 +364,12 @@ def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>;
def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
MFC1_FM<3>, ISA_MIPS32R2;
-def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM<7>, ISA_MIPS32R2;
+def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>;
+def MTHC1_D64 : MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>,
+ MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> {
+ let DecoderNamespace = "Mips64";
+}
def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1,
bitconvert>, MFC1_FM<1>, ISA_MIPS3;
def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1,
@@ -390,56 +403,64 @@ def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
// Cop2 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
-def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
-def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
-def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, ISA_MIPS2;
-def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, ISA_MIPS2;
+def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>,
+ ISA_MIPS2_NOT_32R6_64R6;
// Cop3 Memory Instructions
// FIXME: These aren't really FPU instructions and as such don't belong in this
// file
-def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
-def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
-def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, ISA_MIPS2;
-def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, ISA_MIPS2;
+let DecoderNamespace = "COP3_" in {
+ def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+ def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+ def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>,
+ ISA_MIPS2;
+ def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>,
+ ISA_MIPS2;
+}
// Indexed loads and stores.
// Base register + offset register addressing mode (indicated by "x" in the
// instruction mnemonic) is disallowed under NaCl.
let AdditionalPredicates = [IsNotNaCl] in {
def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>,
- INSN_MIPS4_32R2;
+ INSN_MIPS4_32R2_NOT_32R6_64R6;
def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>,
- INSN_MIPS4_32R2;
+ INSN_MIPS4_32R2_NOT_32R6_64R6;
}
let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in {
def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
- INSN_MIPS4_32R2, FGR_32;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
- INSN_MIPS4_32R2, FGR_32;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32;
}
let DecoderNamespace="Mips64" in {
def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
- INSN_MIPS4_32R2, FGR_64;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
- INSN_MIPS4_32R2, FGR_64;
+ INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64;
}
// Load/store doubleword indexed unaligned.
let AdditionalPredicates = [IsNotNaCl] in {
def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
- INSN_MIPS5_32R2, FGR_32;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32;
def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
- INSN_MIPS5_32R2, FGR_32;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32;
}
let DecoderNamespace="Mips64" in {
def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
- INSN_MIPS5_32R2, FGR_64;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64;
def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
- INSN_MIPS5_32R2, FGR_64;
+ INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64;
}
/// Floating-point Aritmetic
@@ -457,42 +478,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>, ISA_MIPS32R2;
+ MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>, ISA_MIPS32R2;
+ MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
- MADDS_FM<6, 0>, ISA_MIPS32R2;
+ MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
- MADDS_FM<7, 0>, ISA_MIPS32R2;
+ MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6;
}
def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_32;
+ MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32;
}
let isCodeGenOnly=1 in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
}
let AdditionalPredicates = [NoNaNsFPMath],
isCodeGenOnly=1 in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_64;
+ MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64;
}
//===----------------------------------------------------------------------===//
@@ -504,9 +525,9 @@ def MIPS_BRANCH_F : PatLeaf<(i32 0)>;
def MIPS_BRANCH_T : PatLeaf<(i32 1)>;
def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
- BC1F_FM<0, 0>;
+ BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
- BC1F_FM<0, 1>;
+ BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Flag Conditions
@@ -531,12 +552,13 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>;
def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
-def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>;
+def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>,
+ ISA_MIPS1_NOT_32R6_64R6;
def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- AdditionalRequires<[NotFP64bit]>;
+ ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- AdditionalRequires<[IsFP64bit]>;
+ ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
@@ -569,8 +591,10 @@ def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
-def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>;
-def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>;
+def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
+ ISA_MIPS1_NOT_32R6_64R6;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 0377eab..6a01ae5 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -844,6 +844,44 @@ class BARRIER_FM<bits<5> op> : StdArch {
let Inst{5-0} = 0; // SLL
}
+class SDBBP_FM : StdArch {
+ bits<20> code_;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0b011100; // SPECIAL2
+ let Inst{25-6} = code_;
+ let Inst{5-0} = 0b111111; // SDBBP
+}
+
+class JR_HB_FM<bits<6> op> : StdArch{
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0; // SPECIAL
+ let Inst{25-21} = rs;
+ let Inst{20-11} = 0;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = op;
+}
+
+class JALR_HB_FM<bits<6> op> : StdArch {
+ bits<5> rd;
+ bits<5> rs;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0; // SPECIAL
+ let Inst{25-21} = rs;
+ let Inst{20-16} = 0;
+ let Inst{15-11} = rd;
+ let Inst{10} = 1;
+ let Inst{9-6} = 0;
+ let Inst{5-0} = op;
+}
+
class COP0_TLB_FM<bits<6> op> : StdArch {
bits<32> Inst;
@@ -852,3 +890,17 @@ class COP0_TLB_FM<bits<6> op> : StdArch {
let Inst{24-6} = 0;
let Inst{5-0} = op; // Operation
}
+
+class CACHEOP_FM<bits<6> op> : StdArch {
+ bits<21> addr;
+ bits<5> hint;
+ bits<5> base = addr{20-16};
+ bits<16> offset = addr{15-0};
+
+ bits<32> Inst;
+
+ let Inst{31-26} = op;
+ let Inst{25-21} = base;
+ let Inst{20-16} = hint;
+ let Inst{15-0} = offset;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 0d3cb75..8e9472c 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -146,61 +146,61 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasMips2 : Predicate<"Subtarget.hasMips2()">,
+def HasMips2 : Predicate<"Subtarget->hasMips2()">,
AssemblerPredicate<"FeatureMips2">;
-def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">,
+def HasMips3_32 : Predicate<"Subtarget->hasMips3_32()">,
AssemblerPredicate<"FeatureMips3_32">;
-def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">,
+def HasMips3_32r2 : Predicate<"Subtarget->hasMips3_32r2()">,
AssemblerPredicate<"FeatureMips3_32r2">;
-def HasMips3 : Predicate<"Subtarget.hasMips3()">,
+def HasMips3 : Predicate<"Subtarget->hasMips3()">,
AssemblerPredicate<"FeatureMips3">;
-def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">,
+def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">,
AssemblerPredicate<"FeatureMips4_32">;
-def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">,
+def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">,
AssemblerPredicate<"FeatureMips4_32r2">;
-def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">,
+def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">,
AssemblerPredicate<"FeatureMips5_32r2">;
-def HasMips32 : Predicate<"Subtarget.hasMips32()">,
+def HasMips32 : Predicate<"Subtarget->hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
-def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
+def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
-def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">,
+def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">,
AssemblerPredicate<"FeatureMips32r6">;
-def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">,
+def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">,
AssemblerPredicate<"!FeatureMips32r6">;
-def IsGP64bit : Predicate<"Subtarget.isGP64bit()">,
+def IsGP64bit : Predicate<"Subtarget->isGP64bit()">,
AssemblerPredicate<"FeatureGP64Bit">;
-def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">,
+def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">,
AssemblerPredicate<"!FeatureGP64Bit">;
-def HasMips64 : Predicate<"Subtarget.hasMips64()">,
+def HasMips64 : Predicate<"Subtarget->hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
-def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
+def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">,
AssemblerPredicate<"FeatureMips64r2">;
-def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">,
+def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">,
AssemblerPredicate<"FeatureMips64r6">;
-def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">,
+def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">,
AssemblerPredicate<"!FeatureMips64r6">;
-def IsN64 : Predicate<"Subtarget.isABI_N64()">,
+def IsN64 : Predicate<"Subtarget->isABI_N64()">,
AssemblerPredicate<"FeatureN64">;
-def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">,
+def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
-def HasCnMips : Predicate<"Subtarget.hasCnMips()">,
+def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
AssemblerPredicate<"FeatureMips32">;
def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
AssemblerPredicate<"FeatureMips32">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
-def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
+def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
-def NotDSP : Predicate<"!Subtarget.hasDSP()">;
-def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">,
+def NotDSP : Predicate<"!Subtarget->hasDSP()">;
+def InMicroMips : Predicate<"Subtarget->inMicroMipsMode()">,
AssemblerPredicate<"FeatureMicroMips">;
-def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">,
+def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">,
AssemblerPredicate<"!FeatureMicroMips">;
-def IsLE : Predicate<"Subtarget.isLittle()">;
-def IsBE : Predicate<"!Subtarget.isLittle()">;
-def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">;
+def IsLE : Predicate<"Subtarget->isLittle()">;
+def IsBE : Predicate<"!Subtarget->isLittle()">;
+def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">;
//===----------------------------------------------------------------------===//
// Mips GPR size adjectives.
@@ -232,8 +232,17 @@ class ISA_MIPS3_NOT_32R6_64R6 {
list<Predicate> InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6];
}
class ISA_MIPS32 { list<Predicate> InsnPredicates = [HasMips32]; }
+class ISA_MIPS32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips32, NotMips32r6, NotMips64r6];
+}
class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
+class ISA_MIPS32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6];
+}
class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; }
+class ISA_MIPS64_NOT_64R6 {
+ list<Predicate> InsnPredicates = [HasMips64, NotMips64r6];
+}
class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
@@ -241,17 +250,32 @@ class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
+// The portions of MIPS-III that were also added to MIPS32 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS3_32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips3_32, NotMips32r6, NotMips64r6];
+}
+
// The portions of MIPS-III that were also added to MIPS32
class INSN_MIPS3_32R2 { list<Predicate> InsnPredicates = [HasMips3_32r2]; }
-// The portions of MIPS-IV that were also added to MIPS32
-class INSN_MIPS4_32 { list<Predicate> InsnPredicates = [HasMips4_32]; }
+// The portions of MIPS-IV that were also added to MIPS32 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS4_32_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips4_32, NotMips32r6, NotMips64r6];
+}
-// The portions of MIPS-IV that were also added to MIPS32R2
-class INSN_MIPS4_32R2 { list<Predicate> InsnPredicates = [HasMips4_32r2]; }
+// The portions of MIPS-IV that were also added to MIPS32r2 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS4_32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips4_32r2, NotMips32r6, NotMips64r6];
+}
-// The portions of MIPS-V that were also added to MIPS32R2
-class INSN_MIPS5_32R2 { list<Predicate> InsnPredicates = [HasMips5_32r2]; }
+// The portions of MIPS-V that were also added to MIPS32r2 but were removed in
+// MIPS32r6 and MIPS64r6.
+class INSN_MIPS5_32R2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6];
+}
//===----------------------------------------------------------------------===//
@@ -328,7 +352,9 @@ def calltarget : Operand<iPTR> {
let ParserMatchClass = MipsJumpTargetAsmOperand;
}
+def simm9 : Operand<i32>;
def simm10 : Operand<i32>;
+def simm11 : Operand<i32>;
def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
@@ -337,6 +363,13 @@ def simm16 : Operand<i32> {
def simm19_lsl2 : Operand<i32> {
let EncoderMethod = "getSimm19Lsl2Encoding";
let DecoderMethod = "DecodeSimm19Lsl2";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def simm18_lsl3 : Operand<i32> {
+ let EncoderMethod = "getSimm18Lsl3Encoding";
+ let DecoderMethod = "DecodeSimm18Lsl3";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
}
def simm20 : Operand<i32> {
@@ -386,6 +419,15 @@ def MipsMemAsmOperand : AsmOperandClass {
let ParserMethod = "parseMemOperand";
}
+def MipsMemSimm11AsmOperand : AsmOperandClass {
+ let Name = "MemOffsetSimm11";
+ let SuperClasses = [MipsMemAsmOperand];
+ let RenderMethod = "addMemOperands";
+ let ParserMethod = "parseMemOperand";
+ let PredicateMethod = "isMemWithSimmOffset<11>";
+ //let DiagnosticType = "Simm11";
+}
+
def MipsInvertedImmoperand : AsmOperandClass {
let Name = "InvNum";
let RenderMethod = "addImmOperands";
@@ -417,6 +459,17 @@ def mem_msa : mem_generic {
let EncoderMethod = "getMSAMemEncoding";
}
+def mem_simm9 : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm9);
+ let EncoderMethod = "getMemEncoding";
+}
+
+def mem_simm11 : mem_generic {
+ let MIOperandInfo = (ops ptr_rc, simm11);
+ let EncoderMethod = "getMemEncoding";
+ let ParserMatchClass = MipsMemSimm11AsmOperand;
+}
+
def mem_ea : Operand<iPTR> {
let PrintMethod = "printMemOperandEA";
let MIOperandInfo = (ops ptr_rc, simm16);
@@ -690,20 +743,11 @@ class JumpFR<string opstr, RegisterOperand RO,
FrmR, opstr>;
// Indirect branch
-class IndirectBranch<string opstr, RegisterOperand RO> :
- JumpFR<opstr, RO, brind> {
+class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isBranch = 1;
let isIndirectBranch = 1;
}
-// Return instruction
-class RetBase<string opstr, RegisterOperand RO>: JumpFR<opstr, RO> {
- let isReturn = 1;
- let isCodeGenOnly = 1;
- let hasCtrlDep = 1;
- let hasExtraSrcRegAllocReq = 1;
-}
-
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
@@ -1042,7 +1086,7 @@ def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
ADD_FM<0, 0x23>;
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
- ADD_FM<0x1c, 2>, ISA_MIPS32;
+ ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
def SUB : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>;
def SLT : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
@@ -1103,7 +1147,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
ISA_MIPS1_NOT_32R6_64R6;
}
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
+def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>;
def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>;
def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>;
@@ -1127,6 +1171,7 @@ def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
+def SDBBP : SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
@@ -1139,8 +1184,8 @@ let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably
def WAIT : WAIT_FT<"wait">, WAIT_FM;
/// Load-linked, Store-conditional
-def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2;
-def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2;
+def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2_NOT_32R6_64R6;
+def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2_NOT_32R6_64R6;
}
/// Jump and Branch Instructions
@@ -1161,17 +1206,49 @@ def B : UncondBranch<BEQ>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
let AdditionalPredicates = [NotInMicroMips] in {
-def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
-def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
+ def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
+ def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
}
-def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>;
-def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>;
-def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>;
+
+// FIXME: JALX really requires either MIPS16 or microMIPS in addition to MIPS32.
+def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6;
+def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>,
+ ISA_MIPS1_NOT_32R6_64R6;
def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
def TAILCALL : TailCall<J>;
def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
-def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>;
+// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
+// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
+class PseudoIndirectBranchBase<RegisterOperand RO> :
+ MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> {
+ let isTerminator=1;
+ let isBarrier=1;
+ let hasDelaySlot = 1;
+ let isBranch = 1;
+ let isIndirectBranch = 1;
+}
+
+def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
+
+// Return instructions are matched as a RetRA instruction, then ar expanded
+// into PseudoReturn/PseudoReturn64 after register allocation. Finally,
+// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the
+// ISA.
+class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs),
+ [], IIBranch> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let hasDelaySlot = 1;
+ let isReturn = 1;
+ let isCodeGenOnly = 1;
+ let hasCtrlDep = 1;
+ let hasExtraSrcRegAllocReq = 1;
+}
+
+def PseudoReturn : PseudoReturnBase<GPR32Opnd>;
// Exception handling related node and instructions.
// The conversion sequence is:
@@ -1196,20 +1273,24 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in {
/// Multiply and Divide Instructions.
def MULT : MMRel, Mult<"mult", II_MULT, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x18>;
+ MULT_FM<0, 0x18>, ISA_MIPS1_NOT_32R6_64R6;
def MULTu : MMRel, Mult<"multu", II_MULTU, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x19>;
+ MULT_FM<0, 0x19>, ISA_MIPS1_NOT_32R6_64R6;
def SDIV : MMRel, Div<"div", II_DIV, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x1a>;
+ MULT_FM<0, 0x1a>, ISA_MIPS1_NOT_32R6_64R6;
def UDIV : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>,
- MULT_FM<0, 0x1b>;
+ MULT_FM<0, 0x1b>, ISA_MIPS1_NOT_32R6_64R6;
-def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>;
-def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>;
+def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>,
+ ISA_MIPS1_NOT_32R6_64R6;
let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
AdditionalPredicates = [NotInMicroMips] in {
-def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>;
-def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>;
+def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>,
+ ISA_MIPS1_NOT_32R6_64R6;
}
/// Sign Ext In Register Instructions.
@@ -1219,8 +1300,10 @@ def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
SEB_FM<0x18, 0x20>, ISA_MIPS32R2;
/// Count Leading
-def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, ISA_MIPS32;
-def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32;
+def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>,
+ ISA_MIPS32_NOT_32R6_64R6;
/// Word Swap Bytes Within Halfwords
def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
@@ -1235,27 +1318,37 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, ISA_MIPS32;
-def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, ISA_MIPS32;
-def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, ISA_MIPS32;
-def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, ISA_MIPS32;
+def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>,
+ ISA_MIPS32_NOT_32R6_64R6;
let AdditionalPredicates = [NotDSP] in {
-def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>;
-def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>;
-def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>;
-def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>;
-def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>;
-def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>;
-def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>;
-def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>;
-def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>;
+def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>, ISA_MIPS1_NOT_32R6_64R6;
+def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>,
+ ISA_MIPS32_NOT_32R6_64R6;
+def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>,
+ ISA_MIPS32_NOT_32R6_64R6;
}
def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
- 0, 1, 1>;
+ 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
- 0, 1, 1>;
+ 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
@@ -1274,6 +1367,46 @@ def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>;
def EHB : Barrier<"ehb">, BARRIER_FM<3>;
def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+// JR_HB and JALR_HB are defined here using the new style naming
+// scheme because some of this code is shared with Mips32r6InstrInfo.td
+// and because of that it doesn't follow the naming convention of the
+// rest of the file. To avoid a mixture of old vs new style, the new
+// style was chosen.
+class JR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rs");
+ list<dag> Pattern = [];
+}
+
+class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
+ list<dag> Pattern = [];
+}
+
+class JR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>,
+ JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
+ let isBranch=1;
+ let isIndirectBranch=1;
+ let hasDelaySlot=1;
+ let isTerminator=1;
+ let isBarrier=1;
+}
+
+class JALR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>,
+ JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> {
+ let isIndirectBranch=1;
+ let hasDelaySlot=1;
+}
+
+class JR_HB_ENC : JR_HB_FM<8>;
+class JALR_HB_ENC : JALR_HB_FM<9>;
+
+def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6;
+def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32;
+
class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther>;
def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>;
@@ -1281,6 +1414,15 @@ def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
+class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> :
+ InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint),
+ !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>;
+
+def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>,
+ INSN_MIPS3_32_NOT_32R6_64R6;
+def PREF : CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>,
+ INSN_MIPS3_32_NOT_32R6_64R6;
+
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
@@ -1289,19 +1431,23 @@ def : MipsInstAlias<"move $dst, $src",
GPR_32 {
let AdditionalPredicates = [NotInMicroMips];
}
-def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>,
+ ISA_MIPS1_NOT_32R6_64R6;
def : MipsInstAlias<"addu $rs, $rt, $imm",
(ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"add $rs, $rt, $imm",
(ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
def : MipsInstAlias<"and $rs, $rt, $imm",
(ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"and $rs, $imm",
+ (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>;
def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
}
def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
def : MipsInstAlias<"not $rt, $rs",
(NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
def : MipsInstAlias<"neg $rt, $rs",
@@ -1318,6 +1464,8 @@ def : MipsInstAlias<"xor $rs, $rt, $imm",
(XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
def : MipsInstAlias<"or $rs, $rt, $imm",
(ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"or $rs, $imm",
+ (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
@@ -1360,6 +1508,9 @@ def : MipsInstAlias<"sra $rd, $rt, $rs",
(SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
def : MipsInstAlias<"srl $rd, $rt, $rs",
(SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6;
+def : MipsInstAlias<"sync",
+ (SYNC 0), 1>, ISA_MIPS2;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1412,6 +1563,10 @@ let AdditionalPredicates = [NotDSP] in {
(ADDiu GPR32:$src, imm:$imm)>;
}
+// SYNC
+def : MipsPat<(MipsSync (i32 immz)),
+ (SYNC 0)>, ISA_MIPS2;
+
// Call
def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
(JAL tglobaladdr:$dst)>;
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index acfe76e..c6838a3 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -15,6 +15,7 @@
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsMCNaCl.h"
#include "MipsTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -64,7 +65,8 @@ namespace {
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {}
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 :
+ (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {}
const char *getPassName() const override {
return "Mips Long Branch";
@@ -264,6 +266,13 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
LongBrMBB->addSuccessor(BalTgtMBB);
BalTgtMBB->addSuccessor(TgtMBB);
+ // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal
+ // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an
+ // pseudo-instruction wrapping BGEZAL).
+
+ const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+ unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR;
+
if (ABI != MipsSubtarget::N64) {
// $longbr:
// addiu $sp, $sp, -8
@@ -305,9 +314,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT)
.addMBB(TgtMBB).addMBB(BalTgtMBB);
MIBundleBuilder(*LongBrMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT)
- .addReg(Mips::AT).addMBB(TgtMBB).addMBB(BalTgtMBB));
+ .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB))
+ .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT)
+ .addReg(Mips::AT)
+ .addMBB(TgtMBB)
+ .addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
@@ -316,10 +327,23 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
.addReg(Mips::SP).addImm(0);
- MIBundleBuilder(*BalTgtMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
- .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
- .addReg(Mips::SP).addImm(8));
+ if (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8));
+ } else {
+ // In NaCl, modifying the sp is not allowed in branch delay slot.
+ BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
+ .addReg(Mips::SP).addImm(8);
+
+ MIBundleBuilder(*BalTgtMBB, Pos)
+ .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+ .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
+
+ // Bundle-align the target of indirect branch JR.
+ TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN);
+ }
} else {
// $longbr:
// daddiu $sp, $sp, -16
@@ -364,11 +388,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::AT_64).addImm(16);
MIBundleBuilder(*LongBrMBB, Pos)
- .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
- Mips::AT_64).addReg(Mips::AT_64)
- .addMBB(TgtMBB, MipsII::MO_ABS_LO)
- .addMBB(BalTgtMBB));
+ .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB))
+ .append(
+ BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64)
+ .addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO)
+ .addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
@@ -450,9 +475,18 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) {
continue;
int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4;
+ int64_t Offset = computeOffset(I->Br) / ShVal;
+
+ if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+ // The offset calculation does not include sandboxing instructions
+ // that will be added later in the MC layer. Since at this point we
+ // don't know the exact amount of code that "sandboxing" will add, we
+ // conservatively estimate that code will not grow more than 100%.
+ Offset *= 2;
+ }
// Check if offset fits into 16-bit immediate field of branches.
- if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal))
+ if (!ForceLongBranch && isInt<16>(Offset))
continue;
I->HasLongBranch = true;
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index 6bd0366..bff2d0f 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
-def HasMSA : Predicate<"Subtarget.hasMSA()">,
+def HasMSA : Predicate<"Subtarget->hasMSA()">,
AssemblerPredicate<"FeatureMSA">;
class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index e9101cc..8c16f82 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,7 +15,6 @@
#define MIPS_MACHINE_FUNCTION_INFO_H
#include "Mips16HardFloatInfo.h"
-#include "MipsSubtarget.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 83d25ab..084449b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -93,6 +93,9 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isFP64bit())
return CSR_O32_FP64_SaveList;
+ if (Subtarget.isFPXX())
+ return CSR_O32_FPXX_SaveList;
+
return CSR_O32_SaveList;
}
@@ -110,6 +113,9 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
if (Subtarget.isFP64bit())
return CSR_O32_FP64_RegMask;
+ if (Subtarget.isFPXX())
+ return CSR_O32_FPXX_RegMask;
+
return CSR_O32_RegMask;
}
@@ -201,6 +207,11 @@ getReservedRegs(const MachineFunction &MF) const {
Reserved.set(Mips::GP_64);
}
+ if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) {
+ for (const auto &Reg : Mips::OddSPRegClass)
+ Reserved.set(Reg);
+ }
+
return Reserved;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 875a596..6323da3 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -340,6 +340,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>;
+// Used to reserve odd registers when given -mattr=+nooddspreg
+def OddSP : RegisterClass<"Mips", [f32], 32,
+ (add (decimate (sequence "F%u", 1, 31), 2),
+ (decimate (sequence "F_HI%u", 1, 31), 2))>,
+ Unallocatable;
+
// FP control registers.
def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
Unallocatable;
@@ -348,6 +354,10 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>,
def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>,
Unallocatable;
+// MIPS32r6/MIPS64r6 store FPU condition codes in normal FGR registers.
+// This class allows us to represent this in codegen patterns.
+def FGRCC : RegisterClass<"Mips", [i32], 32, (sequence "F%u", 0, 31)>;
+
def MSA128B: RegisterClass<"Mips", [v16i8], 128,
(sequence "W%u", 0, 31)>;
def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128,
@@ -512,6 +522,12 @@ def FGR32Opnd : RegisterOperand<FGR32> {
let ParserMatchClass = FGR32AsmOperand;
}
+def FGRCCOpnd : RegisterOperand<FGRCC> {
+ // The assembler doesn't use register classes so we can re-use
+ // FGR32AsmOperand.
+ let ParserMatchClass = FGR32AsmOperand;
+}
+
def FGRH32Opnd : RegisterOperand<FGRH32> {
let ParserMatchClass = FGRH32AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 6ad5821..6573070 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -16,6 +16,7 @@
#include "MipsAnalyzeImmediate.h"
#include "MipsMachineFunction.h"
#include "MipsSEInstrInfo.h"
+#include "MipsSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -257,6 +258,9 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I,
return true;
}
+MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
+ : MipsFrameLowering(STI, STI.stackAlignment()) {}
+
unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const {
static const unsigned EhDataReg[] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 5d2801f..e832848 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -20,8 +20,7 @@ namespace llvm {
class MipsSEFrameLowering : public MipsFrameLowering {
public:
- explicit MipsSEFrameLowering(const MipsSubtarget &STI)
- : MipsFrameLowering(STI, STI.stackAlignment()) {}
+ explicit MipsSEFrameLowering(const MipsSubtarget &STI);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d5385be..6f35947 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
#define DEBUG_TYPE "mips-isel"
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- if (Subtarget.inMips16Mode())
+ if (Subtarget->inMips16Mode())
return false;
return MipsDAGToDAGISel::runOnMachineFunction(MF);
}
@@ -134,7 +134,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
const TargetRegisterClass *RC;
- if (Subtarget.isABI_N64())
+ if (Subtarget->isABI_N64())
RC = (const TargetRegisterClass*)&Mips::GPR64RegClass;
else
RC = (const TargetRegisterClass*)&Mips::GPR32RegClass;
@@ -142,7 +142,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
V0 = RegInfo.createVirtualRegister(RC);
V1 = RegInfo.createVirtualRegister(RC);
- if (Subtarget.isABI_N64()) {
+ if (Subtarget->isABI_N64()) {
MF.getRegInfo().addLiveIn(Mips::T9_64);
MBB.addLiveIn(Mips::T9_64);
@@ -174,7 +174,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
MF.getRegInfo().addLiveIn(Mips::T9);
MBB.addLiveIn(Mips::T9);
- if (Subtarget.isABI_N32()) {
+ if (Subtarget->isABI_N32()) {
// lui $v0, %hi(%neg(%gp_rel(fname)))
// addu $v1, $v0, $t9
// addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
@@ -187,7 +187,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
return;
}
- assert(Subtarget.isABI_O32());
+ assert(Subtarget->isABI_O32());
// For O32 ABI, the following instruction sequence is emitted to initialize
// the global base register:
@@ -408,7 +408,7 @@ bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
// * MSA is enabled
// * N is a ISD::BUILD_VECTOR representing a constant splat
bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
- if (!Subtarget.hasMSA())
+ if (!Subtarget->hasMSA())
return false;
BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
@@ -422,7 +422,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
- !Subtarget.isLittle()))
+ !Subtarget->isLittle()))
return false;
Imm = SplatValue;
@@ -648,7 +648,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
case ISD::ADDE: {
- if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
+ if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC.
break;
SDValue InFlag = Node->getOperand(2);
Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node);
@@ -658,11 +658,11 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
case ISD::ConstantFP: {
ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
- if (Subtarget.isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO_64, MVT::i64);
Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero);
- } else if (Subtarget.isFP64bit()) {
+ } else if (Subtarget->isFP64bit()) {
SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
Mips::ZERO, MVT::i32);
Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64,
@@ -813,12 +813,12 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
EVT ResVecTy = BVN->getValueType(0);
EVT ViaVecTy;
- if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector())
+ if (!Subtarget->hasMSA() || !BVN->getValueType(0).is128BitVector())
return std::make_pair(false, nullptr);
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
- !Subtarget.isLittle()))
+ !Subtarget->isLittle()))
return std::make_pair(false, nullptr);
switch (SplatBitSize) {
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 969d730..be4ca86 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -39,7 +39,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
// Set up the register classes
addRegisterClass(MVT::i32, &Mips::GPR32RegClass);
- if (isGP64bit())
+ if (Subtarget->isGP64bit())
addRegisterClass(MVT::i64, &Mips::GPR64RegClass);
if (Subtarget->hasDSP() || Subtarget->hasMSA()) {
@@ -120,10 +120,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
if (Subtarget->hasCnMips())
setOperationAction(ISD::MUL, MVT::i64, Legal);
- else if (isGP64bit())
+ else if (Subtarget->isGP64bit())
setOperationAction(ISD::MUL, MVT::i64, Custom);
- if (isGP64bit()) {
+ if (Subtarget->isGP64bit()) {
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
}
@@ -152,6 +152,76 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::STORE, MVT::f64, Custom);
}
+ if (Subtarget->hasMips32r6()) {
+ // MIPS32r6 replaces the accumulator-based multiplies with a three register
+ // instruction
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MULHS, MVT::i32, Legal);
+ setOperationAction(ISD::MULHU, MVT::i32, Legal);
+
+ // MIPS32r6 replaces the accumulator-based division/remainder with separate
+ // three register division and remainder instructions.
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Legal);
+ setOperationAction(ISD::UDIV, MVT::i32, Legal);
+ setOperationAction(ISD::SREM, MVT::i32, Legal);
+ setOperationAction(ISD::UREM, MVT::i32, Legal);
+
+ // MIPS32r6 replaces conditional moves with an equivalent that removes the
+ // need for three GPR read ports.
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::f32, Legal);
+ setOperationAction(ISD::SELECT, MVT::f32, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+
+ assert(Subtarget->isFP64bit() && "FR=1 is required for MIPS32r6");
+ setOperationAction(ISD::SETCC, MVT::f64, Legal);
+ setOperationAction(ISD::SELECT, MVT::f64, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+ setOperationAction(ISD::BRCOND, MVT::Other, Legal);
+
+ // Floating point > and >= are supported via < and <=
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ }
+
+ if (Subtarget->hasMips64r6()) {
+ // MIPS64r6 replaces the accumulator-based multiplies with a three register
+ // instruction
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+ setOperationAction(ISD::MULHS, MVT::i64, Legal);
+ setOperationAction(ISD::MULHU, MVT::i64, Legal);
+
+ // MIPS32r6 replaces the accumulator-based division/remainder with separate
+ // three register division and remainder instructions.
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Legal);
+ setOperationAction(ISD::UDIV, MVT::i64, Legal);
+ setOperationAction(ISD::SREM, MVT::i64, Legal);
+ setOperationAction(ISD::UREM, MVT::i64, Legal);
+
+ // MIPS64r6 replaces conditional moves with an equivalent that removes the
+ // need for three GPR read ports.
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ }
+
computeRegisterProperties();
}
@@ -160,6 +230,14 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) {
return new MipsSETargetLowering(TM);
}
+const TargetRegisterClass *
+MipsSETargetLowering::getRepRegClassFor(MVT VT) const {
+ if (VT == MVT::Untyped)
+ return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass;
+
+ return TargetLowering::getRepRegClassFor(VT);
+}
+
// Enable MSA support for the given integer type and Register class.
void MipsSETargetLowering::
addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) {
@@ -449,8 +527,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize())
return SDValue();
- if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
- selectMADD(N, &DAG))
+ if (Subtarget->hasMips32() && !Subtarget->hasMips32r6() &&
+ N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG))
return SDValue(N, 0);
return SDValue();
@@ -1178,6 +1256,9 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
bool HasLo, bool HasHi,
SelectionDAG &DAG) const {
+ // MIPS32r6/MIPS64r6 removed accumulator based multiplies.
+ assert(!Subtarget->hasMips32r6());
+
EVT Ty = Op.getOperand(0).getValueType();
SDLoc DL(Op);
SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped,
@@ -1651,7 +1732,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_s_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
case Intrinsic::mips_copy_s_d:
- if (hasMips64())
+ if (Subtarget->hasMips64())
// Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT);
else {
@@ -1666,7 +1747,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::mips_copy_u_w:
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
case Intrinsic::mips_copy_u_d:
- if (hasMips64())
+ if (Subtarget->hasMips64())
// Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64.
return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT);
else {
@@ -2943,8 +3024,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
unsigned SrcValReg = MI->getOperand(3).getReg();
const TargetRegisterClass *VecRC = nullptr;
- const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass
- : &Mips::GPR32RegClass;
+ const TargetRegisterClass *GPRRC =
+ Subtarget->isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
unsigned EltLog2Size;
unsigned InsertOp = 0;
unsigned InsveOp = 0;
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 03a20ef..13ef6fc 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef MipsSEISELLOWERING_H
-#define MipsSEISELLOWERING_H
+#ifndef MIPSSEISELLOWERING_H
+#define MIPSSEISELLOWERING_H
#include "MipsISelLowering.h"
#include "MipsRegisterInfo.h"
@@ -46,13 +46,7 @@ namespace llvm {
return false;
}
- const TargetRegisterClass *getRepRegClassFor(MVT VT) const override {
- if (VT == MVT::Untyped)
- return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass :
- &Mips::ACC64RegClass;
-
- return TargetLowering::getRepRegClassFor(VT);
- }
+ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
private:
bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index f6f364f..32da749 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -272,7 +272,7 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
default:
return false;
case Mips::RetRA:
- expandRetRA(MBB, MI, Mips::RET);
+ expandRetRA(MBB, MI);
break;
case Mips::PseudoMFHI:
Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
@@ -428,9 +428,14 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
}
void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned Opc) const {
- BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA);
+ MachineBasicBlock::iterator I) const {
+ const auto &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+ if (Subtarget.isGP64bit())
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64))
+ .addReg(Mips::RA_64);
+ else
+ BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
}
std::pair<bool, bool>
@@ -542,20 +547,31 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1);
DebugLoc dl = I->getDebugLoc();
const TargetRegisterInfo &TRI = getRegisterInfo();
+ bool HasMTHC1 = TM.getSubtarget<MipsSubtarget>().hasMips32r2() ||
+ TM.getSubtarget<MipsSubtarget>().hasMips32r6();
- // For FP32 mode:
- // mtc1 Lo, $fp
- // mtc1 Hi, $fp + 1
- // For FP64 mode:
+ // When mthc1 is available, use:
// mtc1 Lo, $fp
// mthc1 Hi, $fp
+ //
+ // Otherwise, for FP64:
+ // spill + reload via ldc1
+ // This has not been implemented since FP64 on MIPS32 and earlier is not
+ // supported.
+ //
+ // Otherwise, for FP32:
+ // mtc1 Lo, $fp
+ // mtc1 Hi, $fp + 1
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo))
.addReg(LoReg);
- if (FP64) {
- // FIXME: The .addReg(DstReg, RegState::Implicit) is a white lie used to
- // temporarily work around a widespread bug in the -mfp64 support.
+ if (HasMTHC1 || FP64) {
+ assert(TM.getSubtarget<MipsSubtarget>().hasMips32r2() &&
+ "MTHC1 requires MIPS32r2");
+
+ // FIXME: The .addReg(DstReg) is a white lie used to temporarily work
+ // around a widespread bug in the -mfp64 support.
// The problem is that none of the 32-bit fpu ops mention the fact
// that they clobber the upper 32-bits of the 64-bit FPR. Fixing that
// requires a major overhaul of the FPU implementation which can't
@@ -565,9 +581,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB,
// We therefore pretend that it reads the bottom 32-bits to
// artificially create a dependency and prevent the scheduler
// changing the behaviour of the code.
- BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi))
- .addReg(HiReg)
- .addReg(DstReg, RegState::Implicit);
+ BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg)
+ .addReg(DstReg)
+ .addReg(HiReg);
} else
BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi))
.addReg(HiReg);
@@ -580,17 +596,16 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
// indirect jump to TargetReg
const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>();
unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
- unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR;
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA;
- unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9;
- unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+ unsigned SP = STI.isGP64bit() ? Mips::SP_64 : Mips::SP;
+ unsigned RA = STI.isGP64bit() ? Mips::RA_64 : Mips::RA;
+ unsigned T9 = STI.isGP64bit() ? Mips::T9_64 : Mips::T9;
+ unsigned ZERO = STI.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO;
unsigned OffsetReg = I->getOperand(0).getReg();
unsigned TargetReg = I->getOperand(1).getReg();
// addu $ra, $v0, $zero
// addu $sp, $sp, $v1
- // jr $ra
+ // jr $ra (via RetRA)
if (TM.getRelocationModel() == Reloc::PIC_)
BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9)
.addReg(TargetReg).addReg(ZERO);
@@ -598,7 +613,7 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB,
.addReg(TargetReg).addReg(ZERO);
BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP)
.addReg(SP).addReg(OffsetReg);
- BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA);
+ expandRetRA(MBB, I);
}
const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index aa68552..9ac94ce 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -81,8 +81,7 @@ public:
private:
unsigned getAnalyzableBrOpc(unsigned Opc) const override;
- void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned Opc) const;
+ void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
std::pair<bool, bool> compareOpndSize(unsigned Opc,
const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 0d4398e..edd8f67 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "mips-selectiondag-info"
-MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
}
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 6cafb55..2b3d527 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class MipsTargetMachine;
class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+ explicit MipsSelectionDAGInfo(const DataLayout &DL);
~MipsSelectionDAGInfo();
};
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 74ec064..693daa3 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -60,11 +60,9 @@ Mips16ConstantIslands(
/// Select the Mips CPU for the given triple and cpu name.
/// FIXME: Merge with the copy in MipsMCTargetDesc.cpp
-static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
+static StringRef selectMipsCPU(Triple TT, StringRef CPU) {
if (CPU.empty() || CPU == "generic") {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::mips ||
- TheTriple.getArch() == Triple::mipsel)
+ if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel)
CPU = "mips32";
else
CPU = "mips64";
@@ -74,39 +72,56 @@ static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
void MipsSubtarget::anchor() { }
+static std::string computeDataLayout(const MipsSubtarget &ST) {
+ std::string Ret = "";
+
+ // There are both little and big endian mips.
+ if (ST.isLittle())
+ Ret += "e";
+ else
+ Ret += "E";
+
+ Ret += "-m:m";
+
+ // Pointers are 32 bit on some ABIs.
+ if (!ST.isABI_N64())
+ Ret += "-p:32:32";
+
+ // 8 and 16 bit integers only need no have natural alignment, but try to
+ // align them to 32 bits. 64 bit integers have natural alignment.
+ Ret += "-i8:8:32-i16:16:32-i64:64";
+
+ // 32 bit registers are always available and the stack is at least 64 bit
+ // aligned. On N64 64 bit registers are also available and the stack is
+ // 128 bit aligned.
+ if (ST.isABI_N64() || ST.isABI_N32())
+ Ret += "-n32:64-S128";
+ else
+ Ret += "-n32-S64";
+
+ return Ret;
+}
+
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
Reloc::Model _RM, MipsTargetMachine *_TM)
: MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
- IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false),
- HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
- HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
- InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
- InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
- RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) {
- std::string CPUName = CPU;
- CPUName = selectMipsCPU(TT, CPUName);
-
- // Parse features string.
- ParseSubtargetFeatures(CPUName, FS);
-
- if (InMips16Mode && !TM->Options.UseSoftFloat) {
- // Hard float for mips16 means essentially to compile as soft float
- // but to use a runtime library for soft float that is written with
- // native mips32 floating point instructions (those runtime routines
- // run in mips32 hard float mode).
- TM->Options.UseSoftFloat = true;
- TM->Options.FloatABIType = FloatABI::Soft;
- InMips16HardFloat = true;
- }
+ IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false),
+ IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true),
+ HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
+ HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
+ InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
+ HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
+ HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM),
+ TargetTriple(TT),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))),
+ TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*TM)),
+ FrameLowering(MipsFrameLowering::create(*TM, *this)),
+ TLInfo(MipsTargetLowering::create(*TM)) {
PreviousInMips16Mode = InMips16Mode;
- // Initialize scheduling itinerary for the specified CPU.
- InstrItins = getInstrItineraryForCPU(CPUName);
-
// Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and
// MIPS-V. They have not been tested and currently exist for the integrated
// assembler only.
@@ -137,6 +152,11 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
"See -mattr=+fp64.",
false);
+ if (!isABI_O32() && !useOddSPReg())
+ report_fatal_error("-mattr=+nooddspreg is not currently permitted for a "
+ "the O32 ABI.",
+ false);
+
if (hasMips32r6()) {
StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
@@ -167,6 +187,29 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
return OptLevel >= CodeGenOpt::Aggressive;
}
+MipsSubtarget &
+MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine *TM) {
+ std::string CPUName = selectMipsCPU(TargetTriple, CPU);
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FS);
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ if (InMips16Mode && !TM->Options.UseSoftFloat) {
+ // Hard float for mips16 means essentially to compile as soft float
+ // but to use a runtime library for soft float that is written with
+ // native mips32 floating point instructions (those runtime routines
+ // run in mips32 hard float mode).
+ TM->Options.UseSoftFloat = true;
+ TM->Options.FloatABIType = FloatABI::Soft;
+ InMips16HardFloat = true;
+ }
+
+ return *this;
+}
+
//FIXME: This logic for reseting the subtarget along with
// the helper classes can probably be simplified but there are a lot of
// cases so we will defer rewriting this to later.
@@ -186,14 +229,14 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
return;
OverrideMode = Mips16Override;
PreviousInMips16Mode = true;
- TM->setHelperClassesMips16();
+ setHelperClassesMips16();
return;
} else if (ChangeToNoMips16) {
if (!PreviousInMips16Mode)
return;
OverrideMode = NoMips16Override;
PreviousInMips16Mode = false;
- TM->setHelperClassesMipsSE();
+ setHelperClassesMipsSE();
return;
} else {
if (OverrideMode == NoOverride)
@@ -201,16 +244,52 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) {
OverrideMode = NoOverride;
DEBUG(dbgs() << "back to default" << "\n");
if (inMips16Mode() && !PreviousInMips16Mode) {
- TM->setHelperClassesMips16();
+ setHelperClassesMips16();
PreviousInMips16Mode = true;
} else if (!inMips16Mode() && PreviousInMips16Mode) {
- TM->setHelperClassesMipsSE();
+ setHelperClassesMipsSE();
PreviousInMips16Mode = false;
}
return;
}
}
+void MipsSubtarget::setHelperClassesMips16() {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ if (!InstrInfo16) {
+ InstrInfo.reset(MipsInstrInfo::create(*TM));
+ FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
+ TLInfo.reset(MipsTargetLowering::create(*TM));
+ } else {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering 16");
+ assert(InstrInfo && "null instr info 16");
+ assert(FrameLowering && "null frame lowering 16");
+}
+
+void MipsSubtarget::setHelperClassesMipsSE() {
+ InstrInfo16.swap(InstrInfo);
+ FrameLowering16.swap(FrameLowering);
+ TLInfo16.swap(TLInfo);
+ if (!InstrInfoSE) {
+ InstrInfo.reset(MipsInstrInfo::create(*TM));
+ FrameLowering.reset(MipsFrameLowering::create(*TM, *this));
+ TLInfo.reset(MipsTargetLowering::create(*TM));
+ } else {
+ InstrInfoSE.swap(InstrInfo);
+ FrameLoweringSE.swap(FrameLowering);
+ TLInfoSE.swap(TLInfo);
+ }
+ assert(TLInfo && "null target lowering in SE");
+ assert(InstrInfo && "null instr info SE");
+ assert(FrameLowering && "null frame lowering SE");
+}
+
bool MipsSubtarget::mipsSEUsesSoftFloat() const {
return TM->Options.UseSoftFloat && !InMips16HardFloat;
}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 373f481..a3dcf03 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,6 +14,12 @@
#ifndef MIPSSUBTARGET_H
#define MIPSSUBTARGET_H
+#include "MipsFrameLowering.h"
+#include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsJITInfo.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -56,9 +62,16 @@ protected:
// floating point registers instead of only using even ones.
bool IsSingleFloat;
+ // IsFPXX - MIPS O32 modeless ABI.
+ bool IsFPXX;
+
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
+ /// Are odd single-precision registers permitted?
+ /// This corresponds to -modd-spreg and -mno-odd-spreg
+ bool UseOddSPReg;
+
// IsNan2008 - IEEE 754-2008 NaN encoding.
bool IsNaN2008bit;
@@ -132,6 +145,20 @@ protected:
MipsTargetMachine *TM;
Triple TargetTriple;
+
+ const DataLayout DL; // Calculates type size & alignment
+ const MipsSelectionDAGInfo TSInfo;
+ MipsJITInfo JITInfo;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering;
+ std::unique_ptr<const MipsTargetLowering> TLInfo;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo16;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering16;
+ std::unique_ptr<const MipsTargetLowering> TLInfo16;
+ std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
+ std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
+ std::unique_ptr<const MipsTargetLowering> TLInfoSE;
+
public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
@@ -142,6 +169,7 @@ public:
bool isABI_N64() const { return MipsABI == N64; }
bool isABI_N32() const { return MipsABI == N32; }
bool isABI_O32() const { return MipsABI == O32; }
+ bool isABI_FPXX() const { return false; } // TODO: add check for FPXX
unsigned getTargetABI() const { return MipsABI; }
/// This constructor initializes the data members to match that
@@ -154,23 +182,36 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool hasMips1() const { return MipsArchVersion >= Mips1; }
bool hasMips2() const { return MipsArchVersion >= Mips2; }
bool hasMips3() const { return MipsArchVersion >= Mips3; }
+ bool hasMips4() const { return MipsArchVersion >= Mips4; }
+ bool hasMips5() const { return MipsArchVersion >= Mips5; }
bool hasMips4_32() const { return HasMips4_32; }
bool hasMips4_32r2() const { return HasMips4_32r2; }
- bool hasMips32() const { return MipsArchVersion >= Mips32; }
- bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
- MipsArchVersion == Mips64r2; }
- bool hasMips32r6() const { return MipsArchVersion == Mips32r6 ||
- MipsArchVersion == Mips64r6; }
+ bool hasMips32() const {
+ return MipsArchVersion >= Mips32 && MipsArchVersion != Mips3 &&
+ MipsArchVersion != Mips4 && MipsArchVersion != Mips5;
+ }
+ bool hasMips32r2() const {
+ return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips32r6 ||
+ MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
+ }
+ bool hasMips32r6() const {
+ return MipsArchVersion == Mips32r6 || MipsArchVersion == Mips64r6;
+ }
bool hasMips64() const { return MipsArchVersion >= Mips64; }
- bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+ bool hasMips64r2() const {
+ return MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6;
+ }
bool hasMips64r6() const { return MipsArchVersion == Mips64r6; }
bool hasCnMips() const { return HasCnMips; }
bool isLittle() const { return IsLittle; }
+ bool isFPXX() const { return IsFPXX; }
bool isFP64bit() const { return IsFP64bit; }
+ bool useOddSPReg() const { return UseOddSPReg; }
bool isNaN2008() const { return IsNaN2008bit; }
bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
@@ -234,12 +275,31 @@ public:
/// \brief Reset the subtarget for the Mips target.
void resetSubtarget(MachineFunction *MF);
+ MipsSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine *TM);
+
/// Does the system support unaligned memory access.
///
/// MIPS32r6/MIPS64r6 require full unaligned access support but does not
/// specify which component of the system provides it. Hardware, software, and
/// hybrid implementations are all valid.
bool systemSupportsUnalignedAccess() const { return hasMips32r6(); }
+
+ // Set helper classes
+ void setHelperClassesMips16();
+ void setHelperClassesMipsSE();
+
+ MipsJITInfo *getJITInfo() { return &JITInfo; }
+ const MipsSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); }
+ const TargetFrameLowering *getFrameLowering() const {
+ return FrameLowering.get();
+ }
+ const MipsRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo->getRegisterInfo();
+ }
+ const MipsTargetLowering *getTargetLowering() const { return TLInfo.get(); }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 984c58e..425dbf1 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -45,93 +45,21 @@ extern "C" void LLVMInitializeMipsTarget() {
RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
}
-static std::string computeDataLayout(const MipsSubtarget &ST) {
- std::string Ret = "";
-
- // There are both little and big endian mips.
- if (ST.isLittle())
- Ret += "e";
- else
- Ret += "E";
-
- Ret += "-m:m";
-
- // Pointers are 32 bit on some ABIs.
- if (!ST.isABI_N64())
- Ret += "-p:32:32";
-
- // 8 and 16 bit integers only need no have natural alignment, but try to
- // align them to 32 bits. 64 bit integers have natural alignment.
- Ret += "-i8:8:32-i16:16:32-i64:64";
-
- // 32 bit registers are always available and the stack is at least 64 bit
- // aligned. On N64 64 bit registers are also available and the stack is
- // 128 bit aligned.
- if (ST.isABI_N64() || ST.isABI_N32())
- Ret += "-n32:64-S128";
- else
- Ret += "-n32-S64";
-
- return Ret;
-}
-
// On function prologue, the stack is created by decrementing
// its pointer. Once decremented, all references are done with positive
// offset from the stack/frame pointer, using StackGrowsUp enables
// an easier handling.
// Using CodeModel::Large enables different CALL behavior.
-MipsTargetMachine::
-MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, isLittle, RM, this),
- DL(computeDataLayout(Subtarget)),
- InstrInfo(MipsInstrInfo::create(*this)),
- FrameLowering(MipsFrameLowering::create(*this, Subtarget)),
- TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()), JITInfo() {
+MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool isLittle)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, isLittle, RM, this) {
initAsmInfo();
}
-
-void MipsTargetMachine::setHelperClassesMips16() {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- if (!InstrInfo16) {
- InstrInfo.reset(MipsInstrInfo::create(*this));
- FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
- TLInfo.reset(MipsTargetLowering::create(*this));
- } else {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering 16");
- assert(InstrInfo && "null instr info 16");
- assert(FrameLowering && "null frame lowering 16");
-}
-
-void MipsTargetMachine::setHelperClassesMipsSE() {
- InstrInfo16.swap(InstrInfo);
- FrameLowering16.swap(FrameLowering);
- TLInfo16.swap(TLInfo);
- if (!InstrInfoSE) {
- InstrInfo.reset(MipsInstrInfo::create(*this));
- FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget));
- TLInfo.reset(MipsTargetLowering::create(*this));
- } else {
- InstrInfoSE.swap(InstrInfo);
- FrameLoweringSE.swap(FrameLowering);
- TLInfoSE.swap(TLInfo);
- }
- assert(TLInfo && "null target lowering in SE");
- assert(InstrInfo && "null instr info SE");
- assert(FrameLowering && "null frame lowering SE");
-}
void MipsebTargetMachine::anchor() { }
MipsebTargetMachine::
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a5aa39b..a0e7d43 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -14,15 +14,9 @@
#ifndef MIPSTARGETMACHINE_H
#define MIPSTARGETMACHINE_H
-#include "MipsFrameLowering.h"
-#include "MipsISelLowering.h"
-#include "MipsInstrInfo.h"
-#include "MipsJITInfo.h"
-#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
@@ -32,68 +26,47 @@ class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- std::unique_ptr<const MipsInstrInfo> InstrInfo;
- std::unique_ptr<const MipsFrameLowering> FrameLowering;
- std::unique_ptr<const MipsTargetLowering> TLInfo;
- std::unique_ptr<const MipsInstrInfo> InstrInfo16;
- std::unique_ptr<const MipsFrameLowering> FrameLowering16;
- std::unique_ptr<const MipsTargetLowering> TLInfo16;
- std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
- std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
- std::unique_ptr<const MipsTargetLowering> TLInfoSE;
- MipsSelectionDAGInfo TSInfo;
- const InstrItineraryData &InstrItins;
- MipsJITInfo JITInfo;
public:
- MipsTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool isLittle);
+ MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+ const TargetOptions &Options, Reloc::Model RM,
+ CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle);
virtual ~MipsTargetMachine() {}
void addAnalysisPasses(PassManagerBase &PM) override;
- const MipsInstrInfo *getInstrInfo() const override
- { return InstrInfo.get(); }
- const TargetFrameLowering *getFrameLowering() const override
- { return FrameLowering.get(); }
- const MipsSubtarget *getSubtargetImpl() const override
- { return &Subtarget; }
- const DataLayout *getDataLayout() const override
- { return &DL;}
-
+ const MipsInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const MipsSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const InstrItineraryData *getInstrItineraryData() const override {
- return Subtarget.inMips16Mode() ? nullptr : &InstrItins;
+ return Subtarget.inMips16Mode()
+ ? nullptr
+ : &getSubtargetImpl()->getInstrItineraryData();
+ }
+ MipsJITInfo *getJITInfo() override {
+ return Subtarget.getJITInfo();
}
-
- MipsJITInfo *getJITInfo() override { return &JITInfo; }
-
const MipsRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
-
const MipsTargetLowering *getTargetLowering() const override {
- return TLInfo.get();
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
-
const MipsSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
-
- // Set helper classes
- void setHelperClassesMips16();
-
- void setHelperClassesMipsSE();
-
-
};
/// MipsebTargetMachine - Mips32/64 big endian target machine.
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 4ad37ac..99f7d4c 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -12,46 +12,83 @@
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCStreamer.h"
+#include "MCTargetDesc/MipsABIFlagsSection.h"
namespace llvm {
-class MipsTargetStreamer : public MCTargetStreamer {
- virtual void anchor();
+struct MipsABIFlagsSection;
+
+class MipsTargetStreamer : public MCTargetStreamer {
public:
MipsTargetStreamer(MCStreamer &S);
- virtual void emitDirectiveSetMicroMips() = 0;
- virtual void emitDirectiveSetNoMicroMips() = 0;
- virtual void emitDirectiveSetMips16() = 0;
- virtual void emitDirectiveSetNoMips16() = 0;
-
- virtual void emitDirectiveSetReorder() = 0;
- virtual void emitDirectiveSetNoReorder() = 0;
- virtual void emitDirectiveSetMacro() = 0;
- virtual void emitDirectiveSetNoMacro() = 0;
- virtual void emitDirectiveSetAt() = 0;
- virtual void emitDirectiveSetNoAt() = 0;
- virtual void emitDirectiveEnd(StringRef Name) = 0;
-
- virtual void emitDirectiveEnt(const MCSymbol &Symbol) = 0;
- virtual void emitDirectiveAbiCalls() = 0;
- virtual void emitDirectiveNaN2008() = 0;
- virtual void emitDirectiveNaNLegacy() = 0;
- virtual void emitDirectiveOptionPic0() = 0;
- virtual void emitDirectiveOptionPic2() = 0;
+ virtual void emitDirectiveSetMicroMips();
+ virtual void emitDirectiveSetNoMicroMips();
+ virtual void emitDirectiveSetMips16();
+ virtual void emitDirectiveSetNoMips16();
+
+ virtual void emitDirectiveSetReorder();
+ virtual void emitDirectiveSetNoReorder();
+ virtual void emitDirectiveSetMacro();
+ virtual void emitDirectiveSetNoMacro();
+ virtual void emitDirectiveSetAt();
+ virtual void emitDirectiveSetNoAt();
+ virtual void emitDirectiveEnd(StringRef Name);
+
+ virtual void emitDirectiveEnt(const MCSymbol &Symbol);
+ virtual void emitDirectiveAbiCalls();
+ virtual void emitDirectiveNaN2008();
+ virtual void emitDirectiveNaNLegacy();
+ virtual void emitDirectiveOptionPic0();
+ virtual void emitDirectiveOptionPic2();
virtual void emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg) = 0;
- virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) = 0;
- virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) = 0;
+ unsigned ReturnReg);
+ virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
+ virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
- virtual void emitDirectiveSetMips32R2() = 0;
- virtual void emitDirectiveSetMips64() = 0;
- virtual void emitDirectiveSetMips64R2() = 0;
- virtual void emitDirectiveSetDsp() = 0;
+ virtual void emitDirectiveSetMips32R2();
+ virtual void emitDirectiveSetMips64();
+ virtual void emitDirectiveSetMips64R2();
+ virtual void emitDirectiveSetDsp();
// PIC support
- virtual void emitDirectiveCpload(unsigned RegNo) = 0;
+ virtual void emitDirectiveCpload(unsigned RegNo);
virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
- const MCSymbol &Sym, bool IsReg) = 0;
+ const MCSymbol &Sym, bool IsReg);
+
+ /// Emit a '.module fp=value' directive using the given values.
+ /// Updates the .MIPS.abiflags section
+ virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
+ bool Is32BitABI) {
+ ABIFlagsSection.setFpABI(Value, Is32BitABI);
+ }
+
+ /// Emit a '.module fp=value' directive using the current values of the
+ /// .MIPS.abiflags section.
+ void emitDirectiveModuleFP() {
+ emitDirectiveModuleFP(ABIFlagsSection.getFpABI(),
+ ABIFlagsSection.Is32BitABI);
+ }
+
+ virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI);
+ virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){};
+ virtual void emitMipsAbiFlags(){};
+ void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; }
+ bool getCanHaveModuleDir() { return canHaveModuleDirective; }
+
+ // This method enables template classes to set internal abi flags
+ // structure values.
+ template <class PredicateLibrary>
+ void updateABIInfo(const PredicateLibrary &P) {
+ ABIFlagsSection.setAllFromPredicates(P);
+ }
+
+ MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
+
+protected:
+ MipsABIFlagsSection ABIFlagsSection;
+
+private:
+ bool canHaveModuleDirective;
};
// This part is for ascii assembly output
@@ -93,6 +130,13 @@ public:
virtual void emitDirectiveCpload(unsigned RegNo);
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+
+ // ABI Flags
+ void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value,
+ bool Is32BitABI) override;
+ void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+ void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override;
+ void emitMipsAbiFlags() override;
};
// This part is for ELF object output
@@ -144,6 +188,10 @@ public:
void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
const MCSymbol &Sym, bool IsReg) override;
+ // ABI Flags
+ void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override;
+ void emitMipsAbiFlags() override;
+
protected:
bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index d78b4e8..93fabf6 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -34,12 +34,18 @@ def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
"Target SM 3.0">;
def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
"Target SM 3.5">;
+def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
+ "Target SM 5.0">;
// PTX Versions
def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
"Use PTX version 3.0">;
def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
"Use PTX version 3.1">;
+def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
+ "Use PTX version 3.2">;
+def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
+ "Use PTX version 4.0">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -52,6 +58,7 @@ def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
def : Proc<"sm_35", [SM35]>;
+def : Proc<"sm_50", [SM50]>;
def NVPTXInstrInfo : InstrInfo {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 4ec575f..decf02a 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -734,23 +734,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
<< " func_retval0";
} else {
if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*TLI, Ty, vtparts);
- unsigned totalsz = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- totalsz += sz / 8;
- }
- }
+ unsigned totalsz = TD->getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!llvm::getAlign(*F, 0, retAlignment))
retAlignment = TD->getABITypeAlignment(Ty);
@@ -1321,6 +1305,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
// external global variable with init -> .visible
// external without init -> .extern
// appending -> not allowed, assert.
+// for any linkage other than
+// internal, private, linker_private,
+// linker_private_weak, linker_private_weak_def_auto,
+// we emit -> .weak.
void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
raw_ostream &O) {
@@ -1346,6 +1334,9 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append(V->getName().str());
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
+ } else if (!V->hasInternalLinkage() &&
+ !V->hasPrivateLinkage()) {
+ O << ".weak ";
}
}
}
@@ -1356,10 +1347,15 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// Skip meta data
if (GVar->hasSection()) {
- if (GVar->getSection() == "llvm.metadata")
+ if (GVar->getSection() == StringRef("llvm.metadata"))
return;
}
+ // Skip LLVM intrinsic global variables
+ if (GVar->getName().startswith("llvm.") ||
+ GVar->getName().startswith("nvvm."))
+ return;
+
const DataLayout *TD = TM.getDataLayout();
// GlobalVariables are always constant pointers themselves.
@@ -1371,6 +1367,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << ".visible ";
else
O << ".extern ";
+ } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
+ GVar->hasAvailableExternallyLinkage() ||
+ GVar->hasCommonLinkage()) {
+ O << ".weak ";
}
if (llvm::isTexture(*GVar)) {
@@ -1438,7 +1438,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << "linear";
break;
case 2:
- assert(0 && "Anisotropic filtering is not supported");
+ llvm_unreachable("Anisotropic filtering is not supported");
default:
O << "nearest";
break;
@@ -1480,6 +1480,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
O << ".";
emitPTXAddressSpace(PTy->getAddressSpace(), O);
+
+ if (isManaged(*GVar)) {
+ O << " .attribute(.managed)";
+ }
+
if (GVar->getAlignment() == 0)
O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
else
@@ -1497,13 +1502,24 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// Ptx allows variable initilization only for constant and global state
// spaces.
- if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
- (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
- GVar->hasInitializer()) {
- const Constant *Initializer = GVar->getInitializer();
- if (!Initializer->isNullValue()) {
- O << " = ";
- printScalarConstant(Initializer, O);
+ if (GVar->hasInitializer()) {
+ if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
+ (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) {
+ const Constant *Initializer = GVar->getInitializer();
+ // 'undef' is treated as there is no value spefied.
+ if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
+ O << " = ";
+ printScalarConstant(Initializer, O);
+ }
+ } else {
+ // The frontend adds zero-initializer to variables that don't have an
+ // initial value, so skip warning for this case.
+ if (!GVar->getInitializer()->isNullValue()) {
+ std::string warnMsg = "initial value of '" + GVar->getName().str() +
+ "' is not allowed in addrspace(" +
+ llvm::utostr_32(PTy->getAddressSpace()) + ")";
+ report_fatal_error(warnMsg.c_str());
+ }
}
}
} else {
@@ -1562,7 +1578,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
break;
default:
- assert(0 && "type not supported yet");
+ llvm_unreachable("type not supported yet");
}
}
@@ -1682,7 +1698,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << "]";
break;
default:
- assert(0 && "type not supported yet");
+ llvm_unreachable("type not supported yet");
}
return;
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 9030584..8b088412 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -26,6 +26,10 @@
using namespace llvm;
+NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
+ is64bit(STI.is64Bit()) {}
+
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -43,17 +47,21 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
// cvta.local %SP, %SPL;
if (is64bit) {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass);
- MachineInstr *MI = BuildMI(
- MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+ NVPTX::VRFrame).addReg(LocalReg);
+ BuildMI(MBB, MI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64),
LocalReg).addImm(MF.getFunctionNumber());
} else {
unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass);
- MachineInstr *MI = BuildMI(
- MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
- NVPTX::VRFrame).addReg(LocalReg);
- BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
+ MachineInstr *MI =
+ BuildMI(MBB, MBBI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes),
+ NVPTX::VRFrame).addReg(LocalReg);
+ BuildMI(MBB, MI, dl,
+ MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR),
LocalReg).addImm(MF.getFunctionNumber());
}
}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 2ae6d72..56fb673 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -17,16 +17,12 @@
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
-class NVPTXTargetMachine;
-
+class NVPTXSubtarget;
class NVPTXFrameLowering : public TargetFrameLowering {
- NVPTXTargetMachine &tm;
bool is64bit;
public:
- explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
- is64bit(_is64bit) {}
+ explicit NVPTXFrameLowering(NVPTXSubtarget &STI);
bool hasFP(const MachineFunction &MF) const override;
void emitPrologue(MachineFunction &MF) const override;
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 023dd5e..faa9fdb 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -84,7 +84,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
GlobalVariable *GV = I++;
if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
!llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
- !GV->getName().startswith("llvm.")) {
+ !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV->getType()->getElementType(), GV->isConstant(),
GV->getLinkage(),
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index cd30880..0dfbf10 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -24,11 +24,14 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-isel"
-static cl::opt<int>
-FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
- cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
- " 1: do it 2: do it aggressively"),
- cl::init(2));
+unsigned FMAContractLevel = 0;
+
+static cl::opt<unsigned, true>
+FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
+ cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
+ " 1: do it 2: do it aggressively"),
+ cl::location(FMAContractLevel),
+ cl::init(2));
static cl::opt<int> UsePrecDivF32(
"nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
@@ -138,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::LDGV4:
case NVPTXISD::LDUV2:
case NVPTXISD::LDUV4:
- ResNode = SelectLDGLDUVector(N);
+ ResNode = SelectLDGLDU(N);
break;
case NVPTXISD::StoreV2:
case NVPTXISD::StoreV4:
@@ -164,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case ISD::INTRINSIC_WO_CHAIN:
ResNode = SelectIntrinsicNoChain(N);
break;
+ case ISD::INTRINSIC_W_CHAIN:
+ ResNode = SelectIntrinsicChain(N);
+ break;
case NVPTXISD::Tex1DFloatI32:
case NVPTXISD::Tex1DFloatFloat:
case NVPTXISD::Tex1DFloatFloatLevel:
@@ -253,6 +259,12 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::Suld3DV4I32Trap:
ResNode = SelectSurfaceIntrinsic(N);
break;
+ case ISD::AND:
+ case ISD::SRA:
+ case ISD::SRL:
+ // Try to select BFE
+ ResNode = SelectBFE(N);
+ break;
case ISD::ADDRSPACECAST:
ResNode = SelectAddrSpaceCast(N);
break;
@@ -264,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
return SelectCode(N);
}
+SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IID) {
+ default:
+ return NULL;
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_p:
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_p:
+ return SelectLDGLDU(N);
+ }
+}
+
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
const Value *Src = N->getMemOperand()->getValue();
@@ -981,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
return LD;
}
-SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
SDValue Chain = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
+ SDValue Op1;
+ MemSDNode *Mem;
+ bool IsLDG = true;
+
+ // If this is an LDG intrinsic, the address is the third operand. Its its an
+ // LDG/LDU SD node (from custom vector handling), then its the second operand
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ Op1 = N->getOperand(2);
+ Mem = cast<MemIntrinsicSDNode>(N);
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IID) {
+ default:
+ return NULL;
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_p:
+ IsLDG = true;
+ break;
+ case Intrinsic::nvvm_ldu_global_f:
+ case Intrinsic::nvvm_ldu_global_i:
+ case Intrinsic::nvvm_ldu_global_p:
+ IsLDG = false;
+ break;
+ }
+ } else {
+ Op1 = N->getOperand(1);
+ Mem = cast<MemSDNode>(N);
+ }
+
unsigned Opcode;
SDLoc DL(N);
SDNode *LD;
- MemSDNode *Mem = cast<MemSDNode>(N);
SDValue Base, Offset, Addr;
- EVT EltVT = Mem->getMemoryVT().getVectorElementType();
+ EVT EltVT = Mem->getMemoryVT();
+ if (EltVT.isVector()) {
+ EltVT = EltVT.getVectorElementType();
+ }
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1092,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1181,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1276,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1365,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
switch (N->getOpcode()) {
default:
return nullptr;
+ case ISD::INTRINSIC_W_CHAIN:
+ if (IsLDG) {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
+ break;
+ }
+ } else {
+ switch (EltVT.getSimpleVT().SimpleTy) {
+ default:
+ return nullptr;
+ case MVT::i8:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
+ break;
+ case MVT::i16:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
+ break;
+ case MVT::i32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
+ break;
+ case MVT::i64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
+ break;
+ case MVT::f32:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
+ break;
+ case MVT::f64:
+ Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
+ break;
+ }
+ }
+ break;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
@@ -1457,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
- MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+ MemRefs0[0] = Mem->getMemOperand();
cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
return LD;
@@ -2959,6 +3261,214 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
return Ret;
}
+/// SelectBFE - Look for instruction sequences that can be made more efficient
+/// by using the 'bfe' (bit-field extract) PTX instruction
+SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue Len;
+ SDValue Start;
+ SDValue Val;
+ bool IsSigned = false;
+
+ if (N->getOpcode() == ISD::AND) {
+ // Canonicalize the operands
+ // We want 'and %val, %mask'
+ if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
+ std::swap(LHS, RHS);
+ }
+
+ ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
+ if (!Mask) {
+ // We need a constant mask on the RHS of the AND
+ return NULL;
+ }
+
+ // Extract the mask bits
+ uint64_t MaskVal = Mask->getZExtValue();
+ if (!isMask_64(MaskVal)) {
+ // We *could* handle shifted masks here, but doing so would require an
+ // 'and' operation to fix up the low-order bits so we would trade
+ // shr+and for bfe+and, which has the same throughput
+ return NULL;
+ }
+
+ // How many bits are in our mask?
+ uint64_t NumBits = CountTrailingOnes_64(MaskVal);
+ Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+
+ if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
+ // We have a 'srl/and' pair, extract the effective start bit and length
+ Val = LHS.getNode()->getOperand(0);
+ Start = LHS.getNode()->getOperand(1);
+ ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
+ if (StartConst) {
+ uint64_t StartVal = StartConst->getZExtValue();
+ // How many "good" bits do we have left? "good" is defined here as bits
+ // that exist in the original value, not shifted in.
+ uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
+ if (NumBits > GoodBits) {
+ // Do not handle the case where bits have been shifted in. In theory
+ // we could handle this, but the cost is likely higher than just
+ // emitting the srl/and pair.
+ return NULL;
+ }
+ Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
+ } else {
+ // Do not handle the case where the shift amount (can be zero if no srl
+ // was found) is not constant. We could handle this case, but it would
+ // require run-time logic that would be more expensive than just
+ // emitting the srl/and pair.
+ return NULL;
+ }
+ } else {
+ // Do not handle the case where the LHS of the and is not a shift. While
+ // it would be trivial to handle this case, it would just transform
+ // 'and' -> 'bfe', but 'and' has higher-throughput.
+ return NULL;
+ }
+ } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
+ if (LHS->getOpcode() == ISD::AND) {
+ ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
+ if (!ShiftCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+
+ uint64_t ShiftAmt = ShiftCnst->getZExtValue();
+
+ SDValue AndLHS = LHS->getOperand(0);
+ SDValue AndRHS = LHS->getOperand(1);
+
+ // Canonicalize the AND to have the mask on the RHS
+ if (isa<ConstantSDNode>(AndLHS)) {
+ std::swap(AndLHS, AndRHS);
+ }
+
+ ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
+ if (!MaskCnst) {
+ // Mask must be constant
+ return NULL;
+ }
+
+ uint64_t MaskVal = MaskCnst->getZExtValue();
+ uint64_t NumZeros;
+ uint64_t NumBits;
+ if (isMask_64(MaskVal)) {
+ NumZeros = 0;
+ // The number of bits in the result bitfield will be the number of
+ // trailing ones (the AND) minus the number of bits we shift off
+ NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
+ } else if (isShiftedMask_64(MaskVal)) {
+ NumZeros = countTrailingZeros(MaskVal);
+ unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
+ // The number of bits in the result bitfield will be the number of
+ // trailing zeros plus the number of set bits in the mask minus the
+ // number of bits we shift off
+ NumBits = NumZeros + NumOnes - ShiftAmt;
+ } else {
+ // This is not a mask we can handle
+ return NULL;
+ }
+
+ if (ShiftAmt < NumZeros) {
+ // Handling this case would require extra logic that would make this
+ // transformation non-profitable
+ return NULL;
+ }
+
+ Val = AndLHS;
+ Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
+ Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
+ } else if (LHS->getOpcode() == ISD::SHL) {
+ // Here, we have a pattern like:
+ //
+ // (sra (shl val, NN), MM)
+ // or
+ // (srl (shl val, NN), MM)
+ //
+ // If MM >= NN, we can efficiently optimize this with bfe
+ Val = LHS->getOperand(0);
+
+ SDValue ShlRHS = LHS->getOperand(1);
+ ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
+ if (!ShlCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+ uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
+
+ SDValue ShrRHS = RHS;
+ ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
+ if (!ShrCnst) {
+ // Shift amount must be constant
+ return NULL;
+ }
+ uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
+
+ // To avoid extra codegen and be profitable, we need Outer >= Inner
+ if (OuterShiftAmt < InnerShiftAmt) {
+ return NULL;
+ }
+
+ // If the outer shift is more than the type size, we have no bitfield to
+ // extract (since we also check that the inner shift is <= the outer shift
+ // then this also implies that the inner shift is < the type size)
+ if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
+ return NULL;
+ }
+
+ Start =
+ CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
+ Len =
+ CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
+ OuterShiftAmt, MVT::i32);
+
+ if (N->getOpcode() == ISD::SRA) {
+ // If we have a arithmetic right shift, we need to use the signed bfe
+ // variant
+ IsSigned = true;
+ }
+ } else {
+ // No can do...
+ return NULL;
+ }
+ } else {
+ // No can do...
+ return NULL;
+ }
+
+
+ unsigned Opc;
+ // For the BFE operations we form here from "and" and "srl", always use the
+ // unsigned variants.
+ if (Val.getValueType() == MVT::i32) {
+ if (IsSigned) {
+ Opc = NVPTX::BFE_S32rii;
+ } else {
+ Opc = NVPTX::BFE_U32rii;
+ }
+ } else if (Val.getValueType() == MVT::i64) {
+ if (IsSigned) {
+ Opc = NVPTX::BFE_S64rii;
+ } else {
+ Opc = NVPTX::BFE_U64rii;
+ }
+ } else {
+ // We cannot handle this type
+ return NULL;
+ }
+
+ SDValue Ops[] = {
+ Val, Start, Len
+ };
+
+ SDNode *Ret =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+
+ return Ret;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 11f92e7..c44ccb2 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -59,10 +59,11 @@ private:
SDNode *Select(SDNode *N) override;
SDNode *SelectIntrinsicNoChain(SDNode *N);
+ SDNode *SelectIntrinsicChain(SDNode *N);
SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
- SDNode *SelectLDGLDUVector(SDNode *N);
+ SDNode *SelectLDGLDU(SDNode *N);
SDNode *SelectStore(SDNode *N);
SDNode *SelectStoreVector(SDNode *N);
SDNode *SelectLoadParam(SDNode *N);
@@ -71,6 +72,7 @@ private:
SDNode *SelectAddrSpaceCast(SDNode *N);
SDNode *SelectTextureIntrinsic(SDNode *N);
SDNode *SelectSurfaceIntrinsic(SDNode *N);
+ SDNode *SelectBFE(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b0943be..cb452ff 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -33,6 +33,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <sstream>
@@ -111,6 +112,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Jump is Expensive. Don't create extra control flow for 'and', 'or'
// condition branches.
@@ -130,7 +132,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
// Operations not directly supported by NVPTX.
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
@@ -146,6 +154,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom);
+
if (nvptxSubtarget.hasROT64()) {
setOperationAction(ISD::ROTL, MVT::i64, Legal);
setOperationAction(ISD::ROTR, MVT::i64, Legal);
@@ -237,6 +252,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
setOperationAction(ISD::CTPOP, MVT::i32, Legal);
setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ // We have some custom DAG combine patterns for these nodes
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::AND);
+ setTargetDAGCombine(ISD::FADD);
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SHL);
+
// Now deduce the information based on the above mentioned
// actions
computeRegisterProperties();
@@ -328,6 +350,16 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4:
return "NVPTXISD::StoreV4";
+ case NVPTXISD::FUN_SHFL_CLAMP:
+ return "NVPTXISD::FUN_SHFL_CLAMP";
+ case NVPTXISD::FUN_SHFR_CLAMP:
+ return "NVPTXISD::FUN_SHFR_CLAMP";
+ case NVPTXISD::IMAD:
+ return "NVPTXISD::IMAD";
+ case NVPTXISD::MUL_WIDE_SIGNED:
+ return "NVPTXISD::MUL_WIDE_SIGNED";
+ case NVPTXISD::MUL_WIDE_UNSIGNED:
+ return "NVPTXISD::MUL_WIDE_UNSIGNED";
case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
case NVPTXISD::Tex1DFloatFloatLevel:
@@ -441,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const {
- return VT.getScalarType() == MVT::i1;
+TargetLoweringBase::LegalizeTypeAction
+NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1)
+ return TypeSplitVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
}
SDValue
@@ -487,26 +523,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args,
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << getPointerTy().getSizeInBits() << " _";
} else {
- if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) {
- SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, retTy, vtparts);
- unsigned totalsz = 0;
- for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
- unsigned elems = 1;
- EVT elemtype = vtparts[i];
- if (vtparts[i].isVector()) {
- elems = vtparts[i].getVectorNumElements();
- elemtype = vtparts[i].getVectorElementType();
- }
- // TODO: no need to loop
- for (unsigned j = 0, je = elems; j != je; ++j) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- totalsz += sz / 8;
- }
- }
- O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+ if((retTy->getTypeID() == Type::StructTyID) ||
+ isa<VectorType>(retTy)) {
+ O << ".param .align "
+ << retAlignment
+ << " .b8 _["
+ << getDataLayout()->getTypeAllocSize(retTy) << "]";
} else {
assert(false && "Unknown return type");
}
@@ -675,7 +697,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Ty->isAggregateType()) {
// aggregate
SmallVector<EVT, 16> vtparts;
- ComputeValueVTs(*this, Ty, vtparts);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0);
unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
// declare .param .align <align> .b8 .param<n>[<size>];
@@ -687,34 +710,26 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- unsigned elems = 1;
EVT elemtype = vtparts[j];
- if (vtparts[j].isVector()) {
- elems = vtparts[j].getVectorNumElements();
- elemtype = vtparts[j].getVectorElementType();
- }
- for (unsigned k = 0, ke = elems; k != ke; ++k) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue StVal = OutVals[OIdx];
- if (elemtype.getSizeInBits() < 16) {
- StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain,
- DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32),
- StVal, InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, CopyParamOps,
- elemtype, MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += sz / 8;
- ++OIdx;
+ unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]);
+ if (elemtype.isInteger() && (sz < 8))
+ sz = 8;
+ SDValue StVal = OutVals[OIdx];
+ if (elemtype.getSizeInBits() < 16) {
+ StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal);
}
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain,
+ DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(Offsets[j], MVT::i32),
+ StVal, InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
+ CopyParamVTs, CopyParamOps,
+ elemtype, MachinePointerInfo(),
+ ArgAlign);
+ InFlag = Chain.getValue(1);
+ ++OIdx;
}
if (vtparts.size() > 0)
--OIdx;
@@ -899,13 +914,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// struct or vector
SmallVector<EVT, 16> vtparts;
+ SmallVector<uint64_t, 16> Offsets;
const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
assert(PTy && "Type of a byval parameter should be pointer");
- ComputeValueVTs(*this, PTy->getElementType(), vtparts);
+ ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0);
// declare .param .align <align> .b8 .param<n>[<size>];
unsigned sz = Outs[OIdx].Flags.getByValSize();
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign();
// The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
// so we don't need to worry about natural alignment or not.
// See TargetLowering::LowerCallTo().
@@ -917,38 +934,28 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
InFlag = Chain.getValue(1);
- unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
- unsigned elems = 1;
EVT elemtype = vtparts[j];
- if (vtparts[j].isVector()) {
- elems = vtparts[j].getVectorNumElements();
- elemtype = vtparts[j].getVectorElementType();
+ int curOffset = Offsets[j];
+ unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset);
+ SDValue srcAddr =
+ DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
+ DAG.getConstant(curOffset, getPointerTy()));
+ SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+ MachinePointerInfo(), false, false, false,
+ PartAlign);
+ if (elemtype.getSizeInBits() < 16) {
+ theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
}
- for (unsigned k = 0, ke = elems; k != ke; ++k) {
- unsigned sz = elemtype.getSizeInBits();
- if (elemtype.isInteger() && (sz < 8))
- sz = 8;
- SDValue srcAddr =
- DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx],
- DAG.getConstant(curOffset, getPointerTy()));
- SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
- MachinePointerInfo(), false, false, false,
- 0);
- if (elemtype.getSizeInBits() < 16) {
- theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal);
- }
- SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
- DAG.getConstant(curOffset, MVT::i32), theVal,
- InFlag };
- Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, elemtype,
- MachinePointerInfo());
+ SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
+ DAG.getConstant(curOffset, MVT::i32), theVal,
+ InFlag };
+ Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
+ CopyParamOps, elemtype,
+ MachinePointerInfo());
- InFlag = Chain.getValue(1);
- curOffset += sz / 8;
- }
+ InFlag = Chain.getValue(1);
}
++paramCount;
}
@@ -1057,7 +1064,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Generate loads from param memory/moves from registers for result
if (Ins.size() > 0) {
- unsigned resoffset = 0;
if (retTy && retTy->isVectorTy()) {
EVT ObjectVT = getValueType(retTy);
unsigned NumElts = ObjectVT.getVectorNumElements();
@@ -1066,14 +1072,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ObjectVT) == NumElts &&
"Vector was not scalarized");
unsigned sz = EltVT.getSizeInBits();
- bool needTruncate = sz < 16 ? true : false;
+ bool needTruncate = sz < 8 ? true : false;
if (NumElts == 1) {
// Just a simple load
SmallVector<EVT, 4> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
LoadRetVTs.push_back(MVT::i16);
} else
@@ -1097,9 +1104,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (NumElts == 2) {
// LoadV2
SmallVector<EVT, 4> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
LoadRetVTs.push_back(MVT::i16);
LoadRetVTs.push_back(MVT::i16);
@@ -1142,9 +1150,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
for (unsigned i = 0; i < NumElts; i += VecSize) {
SmallVector<EVT, 8> LoadRetVTs;
- if (needTruncate) {
- // If loading i1 result, generate
- // load i16
+ if (EltVT == MVT::i1 || EltVT == MVT::i8) {
+ // If loading i1/i8 result, generate
+ // load.b8 i16
+ // if i1
// trunc i16 to i1
for (unsigned j = 0; j < VecSize; ++j)
LoadRetVTs.push_back(MVT::i16);
@@ -1183,10 +1192,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
} else {
SmallVector<EVT, 16> VTs;
- ComputePTXValueVTs(*this, retTy, VTs);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0);
assert(VTs.size() == Ins.size() && "Bad value decomposition");
+ unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
unsigned sz = VTs[i].getSizeInBits();
+ unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
bool needTruncate = sz < 8 ? true : false;
if (VTs[i].isInteger() && (sz < 8))
sz = 8;
@@ -1212,19 +1224,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
- LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32));
+ LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
DAG.getVTList(LoadRetVTs), LoadRetOps,
- TheLoadType, MachinePointerInfo());
+ TheLoadType, MachinePointerInfo(), AlignI);
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval.getValue(0);
if (needTruncate)
Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0);
InVals.push_back(Ret0);
- resoffset += sz / 8;
}
}
}
@@ -1262,6 +1273,127 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
}
+/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which
+/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
+/// amount, or
+/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
+/// amount.
+SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+
+ // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
+ // {dHi, dLo} = {aHi, aLo} >> Amt
+ // dHi = aHi >> Amt
+ // dLo = shf.r.clamp aLo, aHi, Amt
+
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi,
+ ShAmt);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+ else {
+
+ // {dHi, dLo} = {aHi, aLo} >> Amt
+ // - if (Amt>=size) then
+ // dLo = aHi >> (Amt-size)
+ // dHi = aHi >> Amt (this is either all 0 or all 1)
+ // else
+ // dLo = (aLo >>logic Amt) | (aHi << (size-Amt))
+ // dHi = aHi >> Amt
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+ SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which
+/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift
+/// amount, or
+/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift
+/// amount.
+SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+
+ if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) {
+
+ // For 32bit and sm35, we can use the funnel shift 'shf' instruction.
+ // {dHi, dLo} = {aHi, aLo} << Amt
+ // dHi = shf.l.clamp aLo, aHi, Amt
+ // dLo = aLo << Amt
+
+ SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi,
+ ShAmt);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+ else {
+
+ // {dHi, dLo} = {aHi, aLo} << Amt
+ // - if (Amt>=size) then
+ // dLo = aLo << Amt (all 0)
+ // dLo = aLo << (Amt-size)
+ // else
+ // dLo = aLo << Amt
+ // dHi = (aHi << Amt) | (aLo >> (size-Amt))
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(VTBits, MVT::i32), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32));
+ SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt,
+ DAG.getConstant(VTBits, MVT::i32), ISD::SETGE);
+ SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+ }
+}
+
SDValue
NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -1282,6 +1414,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSTORE(Op, DAG);
case ISD::LOAD:
return LowerLOAD(Op, DAG);
+ case ISD::SHL_PARTS:
+ return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS:
+ return LowerShiftRightParts(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
}
@@ -1495,7 +1632,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
const Function *F = MF.getFunction();
const AttributeSet &PAL = F->getAttributes();
- const TargetLowering *TLI = nvTM->getTargetLowering();
+ const TargetLowering *TLI = DAG.getTarget().getTargetLowering();
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
@@ -1549,8 +1686,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
assert(vtparts.size() > 0 && "empty aggregate type not expected");
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
- EVT partVT = vtparts[parti];
- InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT));
+ InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
++InsIdx;
}
if (vtparts.size() > 0)
@@ -1866,7 +2002,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
unsigned Offset = 0;
EVT VecVT =
- EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize);
+ EVT::getVectorVT(F->getContext(), EltVT, VecSize);
unsigned PerStoreOffset =
TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
@@ -1925,12 +2061,10 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
} else {
SmallVector<EVT, 16> ValVTs;
- // const_cast is necessary since we are still using an LLVM version from
- // before the type system re-write.
- ComputePTXValueVTs(*this, RetTy, ValVTs);
+ SmallVector<uint64_t, 16> Offsets;
+ ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0);
assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition");
- unsigned SizeSoFar = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
SDValue theVal = OutVals[i];
EVT TheValType = theVal.getValueType();
@@ -1954,16 +2088,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
else if (TmpVal.getValueType().getSizeInBits() < 16)
TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
- SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
+ SDValue Ops[] = {
+ Chain,
+ DAG.getConstant(Offsets[i], MVT::i32),
+ TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
DAG.getVTList(MVT::Other), Ops,
TheStoreType,
MachinePointerInfo());
- if(TheValType.isVector())
- SizeSoFar +=
- TheStoreType.getVectorElementType().getStoreSizeInBits() / 8;
- else
- SizeSoFar += TheStoreType.getStoreSizeInBits()/8;
}
}
}
@@ -2220,22 +2352,62 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_ldu_global_i:
case Intrinsic::nvvm_ldu_global_f:
- case Intrinsic::nvvm_ldu_global_p:
+ case Intrinsic::nvvm_ldu_global_p: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
Info.memVT = getValueType(I.getType());
- else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ else if(Intrinsic == Intrinsic::nvvm_ldu_global_p)
+ Info.memVT = getPointerTy();
+ else
Info.memVT = getValueType(I.getType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+
+ // alignment is available as metadata.
+ // Grab it and set the alignment.
+ assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+ MDNode *AlignMD = I.getMetadata("align");
+ assert(AlignMD && "Must have a non-null MDNode");
+ assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+ Value *Align = AlignMD->getOperand(0);
+ int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+ Info.align = Alignment;
+
+ return true;
+ }
+ case Intrinsic::nvvm_ldg_global_i:
+ case Intrinsic::nvvm_ldg_global_f:
+ case Intrinsic::nvvm_ldg_global_p: {
+
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
+ Info.memVT = getValueType(I.getType());
+ else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
+ Info.memVT = getPointerTy();
else
- Info.memVT = MVT::f32;
+ Info.memVT = getValueType(I.getType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.vol = 0;
Info.readMem = true;
Info.writeMem = false;
- Info.align = 0;
+
+ // alignment is available as metadata.
+ // Grab it and set the alignment.
+ assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata");
+ MDNode *AlignMD = I.getMetadata("align");
+ assert(AlignMD && "Must have a non-null MDNode");
+ assert(AlignMD->getNumOperands() == 1 && "Must have a single operand");
+ Value *Align = AlignMD->getOperand(0);
+ int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue();
+ Info.align = Alignment;
+
return true;
+ }
case Intrinsic::nvvm_tex_1d_v4f32_i32:
case Intrinsic::nvvm_tex_1d_v4f32_f32:
@@ -2427,6 +2599,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
switch (Constraint[0]) {
default:
break;
+ case 'b':
case 'r':
case 'h':
case 'c':
@@ -2446,6 +2619,8 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
+ case 'b':
+ return std::make_pair(0U, &NVPTX::Int1RegsRegClass);
case 'c':
return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
case 'h':
@@ -2469,6 +2644,406 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
return 4;
}
+//===----------------------------------------------------------------------===//
+// NVPTX DAG Combining
+//===----------------------------------------------------------------------===//
+
+extern unsigned FMAContractLevel;
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const NVPTXSubtarget &Subtarget,
+ CodeGenOpt::Level OptLevel) {
+ SelectionDAG &DAG = DCI.DAG;
+ // Skip non-integer, non-scalar case
+ EVT VT=N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
+
+ // fold (add (mul a, b), c) -> (mad a, b, c)
+ //
+ if (N0.getOpcode() == ISD::MUL) {
+ assert (VT.isInteger());
+ // For integer:
+ // Since integer multiply-add costs the same as integer multiply
+ // but is more costly than integer add, do the fusion only when
+ // the mul is only used in the add.
+ if (OptLevel==CodeGenOpt::None || VT != MVT::i32 ||
+ !N0.getNode()->hasOneUse())
+ return SDValue();
+
+ // Do the folding
+ return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+ else if (N0.getOpcode() == ISD::FMUL) {
+ if (VT == MVT::f32 || VT == MVT::f64) {
+ if (FMAContractLevel == 0)
+ return SDValue();
+
+ // For floating point:
+ // Do the fusion only when the mul has less than 5 uses and all
+ // are add.
+ // The heuristic is that if a use is not an add, then that use
+ // cannot be fused into fma, therefore mul is still needed anyway.
+ // If there are more than 4 uses, even if they are all add, fusing
+ // them will increase register pressue.
+ //
+ int numUses = 0;
+ int nonAddCount = 0;
+ for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+ UE = N0.getNode()->use_end();
+ UI != UE; ++UI) {
+ numUses++;
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::FADD)
+ ++nonAddCount;
+ }
+ if (numUses >= 5)
+ return SDValue();
+ if (nonAddCount) {
+ int orderNo = N->getIROrder();
+ int orderNo2 = N0.getNode()->getIROrder();
+ // simple heuristics here for considering potential register
+ // pressure, the logics here is that the differnce are used
+ // to measure the distance between def and use, the longer distance
+ // more likely cause register pressure.
+ if (orderNo - orderNo2 < 500)
+ return SDValue();
+
+ // Now, check if at least one of the FMUL's operands is live beyond the node N,
+ // which guarantees that the FMA will not increase register pressure at node N.
+ bool opIsLive = false;
+ const SDNode *left = N0.getOperand(0).getNode();
+ const SDNode *right = N0.getOperand(1).getNode();
+
+ if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+ opIsLive = true;
+
+ if (!opIsLive)
+ for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ int orderNo3 = User->getIROrder();
+ if (orderNo3 > orderNo) {
+ opIsLive = true;
+ break;
+ }
+ }
+
+ if (!opIsLive)
+ for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ int orderNo3 = User->getIROrder();
+ if (orderNo3 > orderNo) {
+ opIsLive = true;
+ break;
+ }
+ }
+
+ if (!opIsLive)
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::FMA, SDLoc(N), VT,
+ N0.getOperand(0), N0.getOperand(1), N1);
+ }
+ }
+
+ return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const NVPTXSubtarget &Subtarget,
+ CodeGenOpt::Level OptLevel) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget,
+ OptLevel);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel);
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // The type legalizer turns a vector load of i8 values into a zextload to i16
+ // registers, optionally ANY_EXTENDs it (if target type is integer),
+ // and ANDs off the high 8 bits. Since we turn this load into a
+ // target-specific DAG node, the DAG combiner fails to eliminate these AND
+ // nodes. Do that here.
+ SDValue Val = N->getOperand(0);
+ SDValue Mask = N->getOperand(1);
+
+ if (isa<ConstantSDNode>(Val)) {
+ std::swap(Val, Mask);
+ }
+
+ SDValue AExt;
+ // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and
+ if (Val.getOpcode() == ISD::ANY_EXTEND) {
+ AExt = Val;
+ Val = Val->getOperand(0);
+ }
+
+ if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) {
+ Val = Val->getOperand(0);
+ }
+
+ if (Val->getOpcode() == NVPTXISD::LoadV2 ||
+ Val->getOpcode() == NVPTXISD::LoadV4) {
+ ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask);
+ if (!MaskCnst) {
+ // Not an AND with a constant
+ return SDValue();
+ }
+
+ uint64_t MaskVal = MaskCnst->getZExtValue();
+ if (MaskVal != 0xff) {
+ // Not an AND that chops off top 8 bits
+ return SDValue();
+ }
+
+ MemSDNode *Mem = dyn_cast<MemSDNode>(Val);
+ if (!Mem) {
+ // Not a MemSDNode?!?
+ return SDValue();
+ }
+
+ EVT MemVT = Mem->getMemoryVT();
+ if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) {
+ // We only handle the i8 case
+ return SDValue();
+ }
+
+ unsigned ExtType =
+ cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))->
+ getZExtValue();
+ if (ExtType == ISD::SEXTLOAD) {
+ // If for some reason the load is a sextload, the and is needed to zero
+ // out the high 8 bits
+ return SDValue();
+ }
+
+ bool AddTo = false;
+ if (AExt.getNode() != 0) {
+ // Re-insert the ext as a zext.
+ Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
+ AExt.getValueType(), Val);
+ AddTo = true;
+ }
+
+ // If we get here, the AND is unnecessary. Just replace it with the load
+ DCI.CombineTo(N, Val, AddTo);
+ }
+
+ return SDValue();
+}
+
+enum OperandSignedness {
+ Signed = 0,
+ Unsigned,
+ Unknown
+};
+
+/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand
+/// that can be demoted to \p OptSize bits without loss of information. The
+/// signedness of the operand, if determinable, is placed in \p S.
+static bool IsMulWideOperandDemotable(SDValue Op,
+ unsigned OptSize,
+ OperandSignedness &S) {
+ S = Unknown;
+
+ if (Op.getOpcode() == ISD::SIGN_EXTEND ||
+ Op.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT OrigVT = Op.getOperand(0).getValueType();
+ if (OrigVT.getSizeInBits() == OptSize) {
+ S = Signed;
+ return true;
+ }
+ } else if (Op.getOpcode() == ISD::ZERO_EXTEND) {
+ EVT OrigVT = Op.getOperand(0).getValueType();
+ if (OrigVT.getSizeInBits() == OptSize) {
+ S = Unsigned;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can
+/// be demoted to \p OptSize bits without loss of information. If the operands
+/// contain a constant, it should appear as the RHS operand. The signedness of
+/// the operands is placed in \p IsSigned.
+static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
+ unsigned OptSize,
+ bool &IsSigned) {
+
+ OperandSignedness LHSSign;
+
+ // The LHS operand must be a demotable op
+ if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign))
+ return false;
+
+ // We should have been able to determine the signedness from the LHS
+ if (LHSSign == Unknown)
+ return false;
+
+ IsSigned = (LHSSign == Signed);
+
+ // The RHS can be a demotable op or a constant
+ if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
+ APInt Val = CI->getAPIntValue();
+ if (LHSSign == Unsigned) {
+ if (Val.isIntN(OptSize)) {
+ return true;
+ }
+ return false;
+ } else {
+ if (Val.isSignedIntN(OptSize)) {
+ return true;
+ }
+ return false;
+ }
+ } else {
+ OperandSignedness RHSSign;
+ if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
+ return false;
+
+ if (LHSSign != RHSSign)
+ return false;
+
+ return true;
+ }
+}
+
+/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply
+/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform
+/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift
+/// amount.
+static SDValue TryMULWIDECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT MulType = N->getValueType(0);
+ if (MulType != MVT::i32 && MulType != MVT::i64) {
+ return SDValue();
+ }
+
+ unsigned OptSize = MulType.getSizeInBits() >> 1;
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Canonicalize the multiply so the constant (if any) is on the right
+ if (N->getOpcode() == ISD::MUL) {
+ if (isa<ConstantSDNode>(LHS)) {
+ std::swap(LHS, RHS);
+ }
+ }
+
+ // If we have a SHL, determine the actual multiply amount
+ if (N->getOpcode() == ISD::SHL) {
+ ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!ShlRHS) {
+ return SDValue();
+ }
+
+ APInt ShiftAmt = ShlRHS->getAPIntValue();
+ unsigned BitWidth = MulType.getSizeInBits();
+ if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) {
+ APInt MulVal = APInt(BitWidth, 1) << ShiftAmt;
+ RHS = DCI.DAG.getConstant(MulVal, MulType);
+ } else {
+ return SDValue();
+ }
+ }
+
+ bool Signed;
+ // Verify that our operands are demotable
+ if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) {
+ return SDValue();
+ }
+
+ EVT DemotedVT;
+ if (MulType == MVT::i32) {
+ DemotedVT = MVT::i16;
+ } else {
+ DemotedVT = MVT::i32;
+ }
+
+ // Truncate the operands to the correct size. Note that these are just for
+ // type consistency and will (likely) be eliminated in later phases.
+ SDValue TruncLHS =
+ DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS);
+ SDValue TruncRHS =
+ DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS);
+
+ unsigned Opc;
+ if (Signed) {
+ Opc = NVPTXISD::MUL_WIDE_SIGNED;
+ } else {
+ Opc = NVPTXISD::MUL_WIDE_UNSIGNED;
+ }
+
+ return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS);
+}
+
+/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes.
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel > 0) {
+ // Try mul.wide combining at OptLevel > 0
+ SDValue Ret = TryMULWIDECombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+ }
+
+ return SDValue();
+}
+
+/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes.
+static SDValue PerformSHLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel > 0) {
+ // Try mul.wide combining at OptLevel > 0
+ SDValue Ret = TryMULWIDECombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+ }
+
+ return SDValue();
+}
+
+SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ // FIXME: Get this from the DAG somehow
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD:
+ case ISD::FADD:
+ return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel);
+ case ISD::MUL:
+ return PerformMULCombine(N, DCI, OptLevel);
+ case ISD::SHL:
+ return PerformSHLCombine(N, DCI, OptLevel);
+ case ISD::AND:
+ return PerformANDCombine(N, DCI);
+ }
+ return SDValue();
+}
+
/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 7bad8a2..7b4026d 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -16,7 +16,6 @@
#define NVPTXISELLOWERING_H
#include "NVPTX.h"
-#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -50,6 +49,11 @@ enum NodeType {
CallSeqBegin,
CallSeqEnd,
CallPrototype,
+ FUN_SHFL_CLAMP,
+ FUN_SHFR_CLAMP,
+ MUL_WIDE_SIGNED,
+ MUL_WIDE_UNSIGNED,
+ IMAD,
Dummy,
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -167,6 +171,8 @@ enum NodeType {
};
}
+class NVPTXSubtarget;
+
//===--------------------------------------------------------------------===//
// TargetLowering Implementation
//===--------------------------------------------------------------------===//
@@ -196,9 +202,9 @@ public:
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
- EVT getSetCCResultType(LLVMContext &, EVT VT) const override {
+ EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override {
if (VT.isVector())
- return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
@@ -236,7 +242,8 @@ public:
// PTX always uses 32-bit shift amounts
MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
- bool shouldSplitVectorType(EVT VT) const override;
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -255,8 +262,12 @@ private:
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
Type *Ty, unsigned Idx) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index 397f4bc..a98fb37 100644
--- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -146,7 +146,7 @@ bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
- // unreachable block elimiation pass.
+ // unreachable block elimination pass.
for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
UI != UE; ++UI) {
if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index cdc8088..b5b4fbe 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -29,8 +29,8 @@ using namespace llvm;
void NVPTXInstrInfo::anchor() {}
// FIXME: Add the subtarget support on this constructor.
-NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
- : NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {}
+NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI)
+ : NVPTXGenInstrInfo(), RegInfo(STI) {}
void NVPTXInstrInfo::copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 88a9e45..2ac2974 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -24,11 +24,10 @@
namespace llvm {
class NVPTXInstrInfo : public NVPTXGenInstrInfo {
- NVPTXTargetMachine &TM;
const NVPTXRegisterInfo RegInfo;
virtual void anchor();
public:
- explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
+ explicit NVPTXInstrInfo(NVPTXSubtarget &STI);
const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index fbcd0e4..d2c0373 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -158,9 +158,12 @@ def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">;
def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">;
def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">;
+def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">;
def true : Predicate<"1">;
+def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">;
+
//===----------------------------------------------------------------------===//
// Some Common Instruction Class Templates
@@ -461,33 +464,45 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(temp.shl(v), MVT::i16);
}]>;
-def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+def MULWIDES64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.s32 \t$dst, $a, $b;", []>;
+def MULWIDES64Imm
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
"mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, i64imm:$b),
+def MULWIDES64Imm64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
"mul.wide.s32 \t$dst, $a, $b;", []>;
-def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, Int32Regs:$b),
+def MULWIDEU64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
+ "mul.wide.u32 \t$dst, $a, $b;", []>;
+def MULWIDEU64Imm
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
"mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst),
- (ins Int32Regs:$a, i64imm:$b),
+def MULWIDEU64Imm64
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),
"mul.wide.u32 \t$dst, $a, $b;", []>;
-def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
+def MULWIDES32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
"mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, i32imm:$b),
+def MULWIDES32Imm
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
+ "mul.wide.s16 \t$dst, $a, $b;", []>;
+def MULWIDES32Imm32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
"mul.wide.s16 \t$dst, $a, $b;", []>;
-def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, Int16Regs:$b),
- "mul.wide.u16 \t$dst, $a, $b;", []>;
-def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst),
- (ins Int16Regs:$a, i32imm:$b),
+def MULWIDEU32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
"mul.wide.u16 \t$dst, $a, $b;", []>;
+def MULWIDEU32Imm32
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
+ "mul.wide.u16 \t$dst, $a, $b;", []>;
def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
@@ -507,25 +522,63 @@ def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
- (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>,
+ (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
- (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
- (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>,
+ (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
- (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>,
+ (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>;
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
- (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>,
+ (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
+ Requires<[doMulWide]>;
+
+
+def SDTMulWide
+ : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
+def mul_wide_signed
+ : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
+def mul_wide_unsigned
+ : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
+
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
+ (MULWIDES32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
+ (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+
+
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
+ (MULWIDES64Imm Int32Regs:$a, imm:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
+ Requires<[doMulWide]>;
+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
+ (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
defm MULT : I3<"mul.lo.s", mul>;
@@ -541,69 +594,75 @@ defm SREM : I3<"rem.s", srem>;
defm UREM : I3<"rem.u", urem>;
// The ri version will not be selected as DAGCombiner::visitUREM will lower it.
+def SDTIMAD
+ : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,
+ SDTCisInt<2>, SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
+def imad
+ : SDNode<"NVPTXISD::IMAD", SDTIMAD>;
+
def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;
def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;
def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add
- (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;
def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),
(ins Int16Regs:$a, i16imm:$b, i16imm:$c),
"mad.lo.s16 \t$dst, $a, $b, $c;",
- [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b),
- imm:$c))]>;
+ [(set Int16Regs:$dst,
+ (imad Int16Regs:$a, imm:$b, imm:$c))]>;
def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;
def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;
def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;
def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$a, i32imm:$b, i32imm:$c),
"mad.lo.s32 \t$dst, $a, $b, $c;",
- [(set Int32Regs:$dst, (add
- (mul Int32Regs:$a, imm:$b), imm:$c))]>;
+ [(set Int32Regs:$dst,
+ (imad Int32Regs:$a, imm:$b, imm:$c))]>;
def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;
def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;
def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>;
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;
def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$a, i64imm:$b, i64imm:$c),
"mad.lo.s64 \t$dst, $a, $b, $c;",
- [(set Int64Regs:$dst, (add
- (mul Int64Regs:$a, imm:$b), imm:$c))]>;
-
+ [(set Int64Regs:$dst,
+ (imad Int64Regs:$a, imm:$b, imm:$c))]>;
def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"neg.s16 \t$dst, $src;",
@@ -809,36 +868,26 @@ multiclass FPCONTRACT32<string OpcStr, Predicate Pred> {
def rrr : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, Float32Regs:$b),
- Float32Regs:$c))]>, Requires<[Pred]>;
- // This is to WAR a weird bug in Tablegen that does not automatically
- // generate the following permutated rule rrr2 from the above rrr.
- // So we explicitly add it here. This happens to FMA32 only.
- // See the comments at FMAD32 and FMA32 for more information.
- def rrr2 : NVPTXInst<(outs Float32Regs:$dst),
- (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c),
- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd Float32Regs:$c,
- (fmul Float32Regs:$a, Float32Regs:$b)))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>,
Requires<[Pred]>;
def rri : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b, f32imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>,
Requires<[Pred]>;
def rir : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b, Float32Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>,
Requires<[Pred]>;
def rii : NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b, f32imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float32Regs:$dst, (fadd
- (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>,
+ [(set Float32Regs:$dst,
+ (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>,
Requires<[Pred]>;
}
@@ -846,73 +895,32 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {
def rrr : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, Float64Regs:$b),
- Float64Regs:$c))]>, Requires<[Pred]>;
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,
+ Requires<[Pred]>;
def rri : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a,
- Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>;
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,
+ Requires<[Pred]>;
def rir : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>,
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,
Requires<[Pred]>;
def rii : NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b, f64imm:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
- [(set Float64Regs:$dst, (fadd
- (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>,
+ [(set Float64Regs:$dst,
+ (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,
Requires<[Pred]>;
}
-// Due to a unknown reason (most likely a bug in tablegen), tablegen does not
-// automatically generate the rrr2 rule from
-// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32.
-// If we reverse the order of the following two lines, then rrr2 rule will be
-// generated for FMA32, but not for rrr.
-// Therefore, we manually write the rrr2 rule in FPCONTRACT32.
-defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>;
-defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>;
-defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>;
-
-// b*c-a => fmad(b, c, -a)
-multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
- (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
- Requires<[Pred]>;
-}
-
-// a-b*c => fmad(-b,c, a)
-// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c
-// b*c-a => fmad(b, c, -a)
-// - legal because b*c-a <=> b*c+(-a)
-multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)),
- (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>,
- Requires<[Pred]>;
- def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a),
- (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>,
- Requires<[Pred]>;
-}
-
-// a-b*c => fmad(-b,c, a)
-// b*c-a => fmad(b, c, -a)
-multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> {
- def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)),
- (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>,
- Requires<[Pred]>;
-
- def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a),
- (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>,
- Requires<[Pred]>;
-}
-
-defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>;
-defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>;
-defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>;
+defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>;
+defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>;
+defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>;
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
@@ -1083,6 +1091,43 @@ multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {
defm SRA : RSHIFT_FORMAT<"shr.s", sra>;
defm SRL : RSHIFT_FORMAT<"shr.u", srl>;
+//
+// Rotate: use ptx shf instruction if available.
+//
+
+// 32 bit r2 = rotl r1, n
+// =>
+// r2 = shf.l r1, r1, n
+def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]> ;
+
+def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
+
+// 32 bit r2 = rotr r1, n
+// =>
+// r2 = shf.r r1, r1, n
+def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]>;
+
+def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]>;
+
+//
+// Rotate: if ptx shf instruction is not available, then use shift+add
+//
// 32bit
def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),
@@ -1100,9 +1145,11 @@ def SUB_FRM_32 : SDNodeXForm<imm, [{
}]>;
def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
- (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>;
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]>;
def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
- (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>;
+ (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
+ Requires<[noHWROT32]>;
def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
Int32Regs:$amt),
@@ -1115,7 +1162,8 @@ def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
!strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t",
!strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
!strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>;
+ [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
Int32Regs:$amt),
@@ -1128,7 +1176,8 @@ def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src,
!strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t",
!strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t",
!strconcat("}}", ""))))))))),
- [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>;
+ [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[noHWROT32]>;
// 64bit
def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
@@ -1177,6 +1226,29 @@ def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src,
!strconcat("}}", ""))))))))),
[(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
+// BFE - bit-field extract
+
+multiclass BFE<string TyStr, RegisterClass RC> {
+ // BFE supports both 32-bit and 64-bit values, but the start and length
+ // operands are always 32-bit
+ def rrr
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, Int32Regs:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ def rri
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, Int32Regs:$b, i32imm:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+ def rii
+ : NVPTXInst<(outs RC:$d),
+ (ins RC:$a, i32imm:$b, i32imm:$c),
+ !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>;
+}
+
+defm BFE_S32 : BFE<"s32", Int32Regs>;
+defm BFE_U32 : BFE<"u32", Int32Regs>;
+defm BFE_S64 : BFE<"s64", Int64Regs>;
+defm BFE_U64 : BFE<"u64", Int64Regs>;
//-----------------------------------
// General Comparison
@@ -1292,6 +1364,32 @@ def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)),
(ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a),
(ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>;
+//
+// Funnnel shift in clamp mode
+//
+// - SDNodes are created so they can be used in the DAG code,
+// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)
+//
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisInt<0>, SDTCisInt<3>]>;
+def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;
+def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;
+
+def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFL_CLAMP Int32Regs:$lo,
+ Int32Regs:$hi, Int32Regs:$amt))]>;
+
+def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",
+ [(set Int32Regs:$dst,
+ (FUN_SHFR_CLAMP Int32Regs:$lo,
+ Int32Regs:$hi, Int32Regs:$amt))]>;
+
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 5e228fc..0ad3dfa 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1057,12 +1057,24 @@ def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_max_32 node:$a, node:$b)>;
+def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b)
+ , (atomic_load_max_64 node:$a, node:$b)>;
+def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_64 node:$a, node:$b)>;
+def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_max_64 node:$a, node:$b)>;
def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umax_32 node:$a, node:$b)>;
+def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
+def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
+def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umax_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1072,6 +1084,14 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+ ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+ ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
+ atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1080,6 +1100,14 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+ ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
+ atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_min
@@ -1089,12 +1117,24 @@ def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_min_32 node:$a, node:$b)>;
+def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
+def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
+def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_min_64 node:$a, node:$b)>;
def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_umin_32 node:$a, node:$b)>;
+def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
+def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
+def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_umin_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32",
".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1104,6 +1144,14 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
+ ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
+ ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
+ atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1112,6 +1160,14 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
+ ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
+ ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
+ atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
+ ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_inc atom_dec
@@ -1153,6 +1209,12 @@ def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_and_32 node:$a, node:$b)>;
+def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
+def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
+def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_and_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and",
atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1162,6 +1224,14 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
+ atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
+ atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
+ atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_or
@@ -1171,6 +1241,12 @@ def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_or_32 node:$a, node:$b)>;
+def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
+def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
+def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_or_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or",
atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1180,6 +1256,14 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>;
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>;
+defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
+ atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
+ atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>;
+defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
+ atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>;
// atom_xor
@@ -1189,6 +1273,12 @@ def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_xor_32 node:$a, node:$b)>;
+def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
+def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
+def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
+ (atomic_load_xor_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor",
atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1198,6 +1288,14 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>;
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>;
+defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
+ atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>;
+defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
+ atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>;
+defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
+ atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>;
+defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
+ ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>;
// atom_cas
@@ -1276,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs,
// Support for ldu on sm_20 or later
//-----------------------------------
-def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{
- MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
- return M->getMemoryVT() == MVT::i8;
-}]>;
-
// Scalar
-// @TODO: Revisit this, Changed imemAny to imem
-multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+multiclass LDU_G<string TyStr, NVPTXRegClass regclass> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ []>, Requires<[hasLDU]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
+ []>, Requires<[hasLDU]>;
}
-multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDU]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
- !strconcat("ldu.global.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>;
-}
-
-defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs,
- ldu_i8>;
-defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_i>;
-defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs,
-int_nvvm_ldu_global_f>;
-defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs,
-int_nvvm_ldu_global_p>;
-defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs,
-int_nvvm_ldu_global_p>;
+defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
+defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
+defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
+defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
+defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
+defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -1406,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE
// Support for ldg on sm_35 or later
//-----------------------------------
-def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{
- MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
- return M->getMemoryVT() == MVT::i8;
-}]>;
-
-multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> {
+multiclass LDG_G<string TyStr, NVPTXRegClass regclass> {
def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
+ []>, Requires<[hasLDG]>;
+ def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
!strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
-}
-
-multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> {
- def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>;
- def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>;
- def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>,
- Requires<[hasLDG]>;
- def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>;
- def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src),
- !strconcat("ld.global.nc.", TyStr),
- [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>;
+ []>, Requires<[hasLDG]>;
}
defm INT_PTX_LDG_GLOBAL_i8
- : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>;
+ : LDG_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDG_GLOBAL_i16
- : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDG_GLOBAL_i32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>;
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
defm INT_PTX_LDG_GLOBAL_f32
- : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>;
+ : LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
- : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>;
+ : LDG_G<"f64 \t$result, [$src];", Float64Regs>;
defm INT_PTX_LDG_GLOBAL_p32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>;
+ : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_p64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>;
+ : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -1689,6 +1728,207 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
[(int_nvvm_compiler_error Int64Regs:$a)]>;
+// isspacep
+
+def ISSPACEP_CONST_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.const \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
+ Requires<[hasPTX31]>;
+def ISSPACEP_CONST_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.const \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
+ Requires<[hasPTX31]>;
+def ISSPACEP_GLOBAL_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.global \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
+def ISSPACEP_GLOBAL_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.global \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
+def ISSPACEP_LOCAL_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.local \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
+def ISSPACEP_LOCAL_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.local \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
+def ISSPACEP_SHARED_32
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep.shared \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
+def ISSPACEP_SHARED_64
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep.shared \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
+
+
+// Special register reads
+def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
+ (ins SpecialRegs:$r),
+ "mov.b32\t$d, $r;", []>;
+
+def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>;
+def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>;
+
+
+// rotate builtin support
+
+def ROTATE_B32_HW_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst,
+ (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>,
+ Requires<[hasHWROT32]> ;
+
+def ROTATE_B32_HW_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$src, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
+ [(set Int32Regs:$dst,
+ (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>,
+ Requires<[hasHWROT32]> ;
+
+def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)),
+ (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]> ;
+
+def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt),
+ (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]> ;
+
+def GET_LO_INT64
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %dummy;\n\t",
+ !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t",
+ !strconcat("}}", "")))),
+ []> ;
+
+def GET_HI_INT64
+ : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src),
+ !strconcat("{{\n\t",
+ !strconcat(".reg .b32 %dummy;\n\t",
+ !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t",
+ !strconcat("}}", "")))),
+ []> ;
+
+def PACK_TWO_INT32
+ : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi),
+ "mov.b64 \t$dst, {{$lo, $hi}};", []> ;
+
+def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src),
+ (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src))> ;
+
+// funnel shift, requires >= sm_32
+def SHF_L_WRAP_B32_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+ "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_L_WRAP_B32_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_R_WRAP_B32_IMM
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt),
+ "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+def SHF_R_WRAP_B32_REG
+ : NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),
+ "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>,
+ Requires<[hasHWROT32]>;
+
+// HW version of rotate 64
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (PACK_TWO_INT32
+ (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), imm:$amt),
+ (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), imm:$amt))>,
+ Requires<[hasHWROT32]>;
+
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
+ (PACK_TWO_INT32
+ (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt),
+ (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>,
+ Requires<[hasHWROT32]>;
+
+
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (PACK_TWO_INT32
+ (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), imm:$amt),
+ (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), imm:$amt))>,
+ Requires<[hasHWROT32]>;
+
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
+ (PACK_TWO_INT32
+ (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src),
+ (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt),
+ (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src),
+ (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>,
+ Requires<[hasHWROT32]>;
+
+// SW version of rotate 64
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt),
+ (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)),
+ (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>,
+ Requires<[noHWROT32]>;
+def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
+ (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>,
+ Requires<[noHWROT32]>;
+
+
//-----------------------------------
// Texture Intrinsics
//-----------------------------------
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 0ee018c..5547649 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -66,7 +66,7 @@ public:
const MCAsmLayout *Layout) const override {
return false;
}
- void AddValueSymbols(MCAssembler *) const override {};
+ void visitUsedExpr(MCStreamer &Streamer) const override {};
const MCSection *FindAssociatedSection() const override {
return nullptr;
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 7a38a66..3482248 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -46,6 +46,10 @@ foreach i = 0-4 in {
def da#i : NVPTXReg<"%da"#i>;
}
+foreach i = 0-31 in {
+ def ENVREG#i : NVPTXReg<"%envreg"#i>;
+}
+
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
@@ -61,4 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>;
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot,
+ (sequence "ENVREG%u", 0, 31))>;
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 8c7df52..d5cded2 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -25,10 +25,41 @@ using namespace llvm;
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
+static std::string computeDataLayout(bool is64Bit) {
+ std::string Ret = "e";
+
+ if (!is64Bit)
+ Ret += "-p:32:32";
+
+ Ret += "-i64:64-v16:16-v32:32-n16:32:64";
+
+ return Ret;
+}
+
+NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ // Provide the default CPU if we don't have one.
+ if (CPU.empty() && FS.size())
+ llvm_unreachable("we are not using FeatureStr");
+ TargetName = CPU.empty() ? "sm_20" : CPU;
+
+ ParseSubtargetFeatures(TargetName, FS);
+
+ // Set default to PTX 3.2 (CUDA 5.5)
+ if (PTXVersion == 0) {
+ PTXVersion = 32;
+ }
+
+ return *this;
+}
+
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit)
+ const std::string &FS, const TargetMachine &TM,
+ bool is64Bit)
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
- SmVersion(20) {
+ SmVersion(20), DL(computeDataLayout(is64Bit)),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+ TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) {
Triple T(TT);
@@ -36,26 +67,4 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
drvInterface = NVPTX::NVCL;
else
drvInterface = NVPTX::CUDA;
-
- // Provide the default CPU if none
- std::string defCPU = "sm_20";
-
- ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
-
- // Get the TargetName from the FS if available
- if (FS.empty() && CPU.empty())
- TargetName = defCPU;
- else if (!CPU.empty())
- TargetName = CPU;
- else
- llvm_unreachable("we are not using FeatureStr");
-
- // We default to PTX 3.1, but we cannot just default to it in the initializer
- // since the attribute parser checks if the given option is >= the default.
- // So if we set ptx31 as the default, the ptx30 attribute would never match.
- // Instead, we use 0 as the default and manually set 31 if the default is
- // used.
- if (PTXVersion == 0) {
- PTXVersion = 31;
- }
}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 581e5ed..3ed5747 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -15,6 +15,12 @@
#define NVPTXSUBTARGET_H
#include "NVPTX.h"
+#include "NVPTXFrameLowering.h"
+#include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
+#include "NVPTXRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -35,12 +41,30 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// SM version x.y is represented as 10*x+y, e.g. 3.1 == 31
unsigned int SmVersion;
+ const DataLayout DL; // Calculates type size & alignment
+ NVPTXInstrInfo InstrInfo;
+ NVPTXTargetLowering TLInfo;
+ TargetSelectionDAGInfo TSInfo;
+
+ // NVPTX does not have any call stack frame, but need a NVPTX specific
+ // FrameLowering class because TargetFrameLowering is abstract.
+ NVPTXFrameLowering FrameLowering;
+
public:
/// This constructor initializes the data members to match that
/// of the specified module.
///
NVPTXSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit);
+ const std::string &FS, const TargetMachine &TM, bool is64Bit);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const NVPTXRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
bool hasBrkPt() const { return SmVersion >= 11; }
bool hasAtomRedG32() const { return SmVersion >= 11; }
@@ -57,10 +81,12 @@ public:
bool hasFMAF32() const { return SmVersion >= 20; }
bool hasFMAF64() const { return SmVersion >= 13; }
bool hasLDG() const { return SmVersion >= 32; }
- bool hasLDU() const { return SmVersion >= 20; }
+ bool hasLDU() const { return ((SmVersion >= 20) && (SmVersion < 30)); }
bool hasGenericLdSt() const { return SmVersion >= 20; }
- inline bool hasHWROT32() const { return false; }
- inline bool hasSWROT32() const { return true; }
+ inline bool hasHWROT32() const { return SmVersion >= 32; }
+ inline bool hasSWROT32() const {
+ return ((SmVersion >= 20) && (SmVersion < 32));
+ }
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
@@ -76,6 +102,7 @@ public:
unsigned getPTXVersion() const { return PTXVersion; }
+ NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
};
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 26a4f84..069a1b9 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -66,26 +66,13 @@ extern "C" void LLVMInitializeNVPTXTarget() {
*PassRegistry::getPassRegistry());
}
-static std::string computeDataLayout(const NVPTXSubtarget &ST) {
- std::string Ret = "e";
-
- if (!ST.is64Bit())
- Ret += "-p:32:32";
-
- Ret += "-i64:64-v16:16-v32:32-n16:32:64";
-
- return Ret;
-}
-
-NVPTXTargetMachine::NVPTXTargetMachine(
- const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL, bool is64bit)
+NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit), DL(computeDataLayout(Subtarget)),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(
- *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
@@ -119,6 +106,7 @@ public:
bool addInstSelector() override;
bool addPreRegAlloc() override;
bool addPostRegAlloc() override;
+ void addMachineSSAOptimization() override;
FunctionPass *createTargetRegisterAllocator(bool) override;
void addFastRegAlloc(FunctionPass *RegAllocPass) override;
@@ -220,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
printAndVerify("After StackSlotColoring");
}
+
+void NVPTXPassConfig::addMachineSSAOptimization() {
+ // Pre-ra tail duplication.
+ if (addPass(&EarlyTailDuplicateID))
+ printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+ // Optimize PHIs before DCE: removing dead PHI cycles may make more
+ // instructions dead.
+ addPass(&OptimizePHIsID);
+
+ // This pass merges large allocas. StackSlotColoring is a different pass
+ // which merges spill slots.
+ addPass(&StackColoringID);
+
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ addPass(&LocalStackSlotAllocationID);
+
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ addPass(&DeadMachineInstructionElimID);
+ printAndVerify("After codegen DCE pass");
+
+ // Allow targets to insert passes that improve instruction level parallelism,
+ // like if-conversion. Such passes will typically need dominator trees and
+ // loop info, just like LICM and CSE below.
+ if (addILPOpts())
+ printAndVerify("After ILP optimizations");
+
+ addPass(&MachineLICMID);
+ addPass(&MachineCSEID);
+
+ addPass(&MachineSinkingID);
+ printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+ addPass(&PeepholeOptimizerID);
+ printAndVerify("After codegen peephole optimization pass");
+}
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 2db7c18..a7a1c8f 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -14,13 +14,8 @@
#ifndef NVPTX_TARGETMACHINE_H
#define NVPTX_TARGETMACHINE_H
-#include "ManagedStringPool.h"
-#include "NVPTXFrameLowering.h"
-#include "NVPTXISelLowering.h"
-#include "NVPTXInstrInfo.h"
-#include "NVPTXRegisterInfo.h"
#include "NVPTXSubtarget.h"
-#include "llvm/IR/DataLayout.h"
+#include "ManagedStringPool.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
@@ -31,50 +26,37 @@ namespace llvm {
///
class NVPTXTargetMachine : public LLVMTargetMachine {
NVPTXSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- NVPTXInstrInfo InstrInfo;
- NVPTXTargetLowering TLInfo;
- TargetSelectionDAGInfo TSInfo;
-
- // NVPTX does not have any call stack frame, but need a NVPTX specific
- // FrameLowering class because TargetFrameLowering is abstract.
- NVPTXFrameLowering FrameLowering;
// Hold Strings that can be free'd all together with NVPTXTargetMachine
ManagedStringPool ManagedStrPool;
- //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
- // bool DisableVerify, MCContext *&OutCtx);
-
public:
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ const NVPTXInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const DataLayout *getDataLayout() const override { return &DL; }
const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
-
const NVPTXRegisterInfo *getRegisterInfo() const override {
- return &(InstrInfo.getRegisterInfo());
+ return getSubtargetImpl()->getRegisterInfo();
}
- NVPTXTargetLowering *getTargetLowering() const override {
- return const_cast<NVPTXTargetLowering *>(&TLInfo);
+ const NVPTXTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
}
const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- //virtual bool addInstSelector(PassManagerBase &PM,
- // CodeGenOpt::Level OptLevel);
-
- //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
-
ManagedStringPool *getManagedStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index cb8bd72..a8d6b95 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
@@ -47,17 +48,16 @@ class NVVMReflect : public ModulePass {
private:
StringMap<int> VarMap;
typedef DenseMap<std::string, int>::iterator VarMapIter;
- Function *ReflectFunction;
public:
static char ID;
- NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) {
+ NVVMReflect() : ModulePass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
}
NVVMReflect(const StringMap<int> &Mapping)
- : ModulePass(ID), ReflectFunction(nullptr) {
+ : ModulePass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
I != E; ++I) {
@@ -70,6 +70,8 @@ public:
}
bool runOnModule(Module &) override;
+private:
+ bool handleFunction(Function *ReflectFunction);
void setVarMap();
};
}
@@ -120,19 +122,7 @@ void NVVMReflect::setVarMap() {
}
}
-bool NVVMReflect::runOnModule(Module &M) {
- if (!NVVMReflectEnabled)
- return false;
-
- setVarMap();
-
- ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
-
- // If reflect function is not used, then there will be
- // no entry in the module.
- if (!ReflectFunction)
- return false;
-
+bool NVVMReflect::handleFunction(Function *ReflectFunction) {
// Validate _reflect function
assert(ReflectFunction->isDeclaration() &&
"_reflect function should not have a body");
@@ -155,13 +145,15 @@ bool NVVMReflect::runOnModule(Module &M) {
"Only one operand expect for _reflect function");
// In cuda, we will have an extra constant-to-generic conversion of
// the string.
- const Value *conv = Reflect->getArgOperand(0);
- assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
- const CallInst *ConvCall = cast<CallInst>(conv);
- const Value *str = ConvCall->getArgOperand(0);
- assert(isa<ConstantExpr>(str) &&
+ const Value *Str = Reflect->getArgOperand(0);
+ if (isa<CallInst>(Str)) {
+ // CUDA path
+ const CallInst *ConvCall = cast<CallInst>(Str);
+ Str = ConvCall->getArgOperand(0);
+ }
+ assert(isa<ConstantExpr>(Str) &&
"Format of _reflect function not recognized");
- const ConstantExpr *GEP = cast<ConstantExpr>(str);
+ const ConstantExpr *GEP = cast<ConstantExpr>(Str);
const Value *Sym = GEP->getOperand(0);
assert(isa<Constant>(Sym) && "Format of _reflect function not recognized");
@@ -195,3 +187,36 @@ bool NVVMReflect::runOnModule(Module &M) {
ToRemove[i]->eraseFromParent();
return true;
}
+
+bool NVVMReflect::runOnModule(Module &M) {
+ if (!NVVMReflectEnabled)
+ return false;
+
+ setVarMap();
+
+
+ bool Res = false;
+ std::string Name;
+ Type *Tys[1];
+ Type *I8Ty = Type::getInt8Ty(M.getContext());
+ Function *ReflectFunction;
+
+ // Check for standard overloaded versions of llvm.nvvm.reflect
+
+ for (unsigned i = 0; i != 5; ++i) {
+ Tys[0] = PointerType::get(I8Ty, i);
+ Name = Intrinsic::getName(Intrinsic::nvvm_reflect, Tys);
+ ReflectFunction = M.getFunction(Name);
+ if(ReflectFunction != 0) {
+ Res |= handleFunction(ReflectFunction);
+ }
+ }
+
+ ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+ // If reflect function is not used, then there will be
+ // no entry in the module.
+ if (ReflectFunction != 0)
+ Res |= handleFunction(ReflectFunction);
+
+ return Res;
+}
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index 45cc0b8..02c5a94 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -1,5 +1,5 @@
-#ifndef __CL_COMMON_DEFINES_H__
-#define __CL_COMMON_DEFINES_H__
+#ifndef CL_COMMON_DEFINES_H
+#define CL_COMMON_DEFINES_H
// This file includes defines that are common to both kernel code and
// the NVPTX back-end.
@@ -119,4 +119,4 @@ typedef enum clk_sampler_type {
#define CLK_LOCAL_MEM_FENCE (1 << 0)
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
-#endif // __CL_COMMON_DEFINES_H__
+#endif // CL_COMMON_DEFINES_H
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 3ac037d..2f562ca 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -238,7 +238,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseExpression(const MCExpr *&EVal);
bool ParseDarwinExpression(const MCExpr *&EVal);
- bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseOperand(OperandVector &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
@@ -246,12 +246,11 @@ class PPCAsmParser : public MCTargetAsmParser {
bool ParseDarwinDirectiveMachine(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
- void ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
/// @name Auto-generated Match Functions
/// {
@@ -276,13 +275,12 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
const MCExpr *applyModifierToExpr(const MCExpr *E,
@@ -548,8 +546,9 @@ public:
void print(raw_ostream &OS) const override;
- static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Token);
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -558,22 +557,27 @@ public:
return Op;
}
- static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S,
- bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
// Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
- PPCOperand *Op = new (Mem) PPCOperand(Token);
- Op->Tok.Data = (const char *)(Op + 1);
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = (const char *)(Op.get() + 1);
Op->Tok.Length = Str.size();
- std::memcpy((char *)(Op + 1), Str.data(), Str.size());
+ std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsPPC64 = IsPPC64;
return Op;
}
- static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Immediate);
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -581,9 +585,9 @@ public:
return Op;
}
- static PPCOperand *CreateExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(Expression);
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
Op->Expr.Val = Val;
Op->Expr.CRVal = EvaluateCRExpr(Val);
Op->StartLoc = S;
@@ -592,9 +596,9 @@ public:
return Op;
}
- static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym,
- SMLoc S, SMLoc E, bool IsPPC64) {
- PPCOperand *Op = new PPCOperand(TLSRegister);
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
@@ -602,8 +606,8 @@ public:
return Op;
}
- static PPCOperand *CreateFromMCExpr(const MCExpr *Val,
- SMLoc S, SMLoc E, bool IsPPC64) {
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
return CreateImm(CE->getValue(), S, E, IsPPC64);
@@ -634,10 +638,8 @@ void PPCOperand::print(raw_ostream &OS) const {
}
}
-
-void PPCAsmParser::
-ProcessInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
int Opcode = Inst.getOpcode();
switch (Opcode) {
case PPC::LAx: {
@@ -917,11 +919,10 @@ ProcessInstruction(MCInst &Inst,
}
}
-bool PPCAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
@@ -942,7 +943,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
}
@@ -1216,12 +1217,10 @@ ParseDarwinExpression(const MCExpr *&EVal) {
/// ParseOperand
/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
/// rNN for MachO.
-bool PPCAsmParser::
-ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
- PPCOperand *Op;
// Attempt to parse the next token as an immediate
switch (getLexer().getKind()) {
@@ -1233,8 +1232,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
return Error(S, "invalid register name");
@@ -1249,8 +1247,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
int64_t IntVal;
if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
Parser.Lex(); // Eat the identifier token.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
return false;
}
}
@@ -1272,8 +1269,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
}
// Push the parsed operand into the list of operands
- Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
// Check whether this is a TLS call expression
bool TLSCall = false;
@@ -1292,8 +1288,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
}
// Otherwise, check for D-form memory operands
@@ -1340,17 +1335,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
- Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64());
- Operands.push_back(Op);
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
}
return false;
}
/// Parse an instruction mnemonic followed by its operands.
-bool PPCAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
// The first operand is the token for the instruction name.
// If the next character is a '+' or '-', we need to add it to the
// instruction name, to match what TableGen is doing.
@@ -1554,7 +1547,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() {
// Define this matcher function after the auto-generated include so we
// have the match class enum definitions.
-unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
// If the kind is a token for a literal immediate, check if our asm
// operand matches. This is for InstAliases which have a fixed-value
@@ -1568,8 +1561,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
default: return Match_InvalidOperand;
}
- PPCOperand *Op = static_cast<PPCOperand*>(AsmOp);
- if (Op->isImm() && Op->getImm() == ImmVal)
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
return Match_Success;
return Match_InvalidOperand;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index a4983ad..435a93f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -102,17 +102,45 @@ public:
// Output the constant in big/little endian byte order.
unsigned Size = Desc.getSize();
- if (IsLittleEndian) {
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)Bits;
- Bits >>= 8;
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
- } else {
- int ShiftValue = (Size * 8) - 8;
- for (unsigned i = 0; i != Size; ++i) {
- OS << (char)(Bits >> ShiftValue);
- Bits <<= 8;
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
}
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
}
++MCNumEmitted; // Keep track of the # of mi's emitted.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index 10d068d..3ac0aca 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
using namespace llvm;
@@ -127,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
return true;
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols_(BE->getLHS(), Asm);
- AddValueSymbols_(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbols_(getSubExpr(), Asm);
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 3421b91..bca4085 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -79,7 +79,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index bd58539..a9842b2 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", ""
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
"Enable 64-bit instructions">;
@@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model,
FeaturePOPCNTD, FeatureLDBRX,
Feature64Bit /*, Feature64BitRegs */,
DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
+ [DirectivePwr8, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index e89fb2d..fd044d9 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -365,8 +365,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Transform %Xd = ADDIStocHA %X2, <ga:@sym>
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
- // Change the opcode to ADDIS8. If the global address is external,
- // has common linkage, is a function address, or is a jump table
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
// address, then generate a TOC entry and reference that. Otherwise
// reference the symbol directly.
TmpInst.setOpcode(PPC::ADDIS8);
@@ -375,7 +375,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
"Invalid operand for ADDIStocHA!");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
@@ -384,15 +384,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
IsCommon = GV->hasCommonLinkage();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
- TM.getCodeModel() == CodeModel::Large)
+ if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
+ MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
@@ -425,7 +426,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
MOSymbol = getSymbol(GValue);
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ if (GValue->getType()->getElementType()->isFunctionTy() ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
GValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -450,17 +452,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
- bool IsFunction = false;
+ bool IsNonLocalFunction = false;
if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
MOSymbol = getSymbol(GV);
IsExternal = GV->isDeclaration();
- IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
+ if (IsNonLocalFunction || IsExternal ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index ed3cb4d..92a0ec1 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1030,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (DstVT != MVT::i32 && DstVT != MVT::i64)
return false;
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
Value *Src = I->getOperand(0);
Type *SrcTy = Src->getType();
if (!isTypeLegal(SrcTy, SrcVT))
@@ -1197,6 +1201,11 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
bool IsVarArg) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ CCInfo.AllocateStack(LinkageSize, 8);
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
// Bail out if we can't handle any of the arguments.
@@ -1218,6 +1227,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
// Get a count of how many bytes are to be pushed onto the stack.
NumBytes = CCInfo.getNextStackOffset();
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
// Issue CALLSEQ_START.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TII.getCallFrameSetupOpcode()))
@@ -1858,16 +1874,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
// FIXME: Jump tables are not yet required because fast-isel doesn't
// handle switches; if that changes, we need them as well. For now,
// what follows assumes everything's a generic (or TLS) global address.
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar) {
- // If GV is an alias, use the aliasee for determining thread-locality.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
- }
// FIXME: We don't yet handle the complexity of TLS.
- bool IsTLS = GVar && GVar->isThreadLocal();
- if (IsTLS)
+ if (GV->isThreadLocal())
return 0;
// For small code model, generate a simple TOC load.
@@ -1877,8 +1886,8 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
.addGlobalAddress(GV)
.addReg(PPC::X2);
else {
- // If the address is an externally defined symbol, a symbol with
- // common or externally available linkage, a function address, or a
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
// jump table address (not yet needed), or if we are generating code
// for large code model, we generate:
// LDtocL(GV, ADDIStocHA(%X2, GV))
@@ -1889,12 +1898,13 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
- // !GVar implies a function address. An external variable is one
- // without an initializer.
// If/when switches are implemented, jump tables should be handled
// on the "if" path here.
- if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
- GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ if (CModel == CodeModel::Large ||
+ (GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage())
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
DestReg).addGlobalAddress(GV).addReg(HighPartReg);
else
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index e294156..65e9cf2 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -15,6 +15,7 @@
#include "PPCInstrBuilder.h"
#include "PPCInstrInfo.h"
#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = {
PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
};
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ Subtarget(STI) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
/// RemoveVRSaveCode - We have found that this function does not need any code
/// to manipulate the VRSAVE register, even though it uses vector registers.
/// This can happen when the only registers used are known to be live in or out
@@ -236,9 +398,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// Get the maximum call frame size of all the calls.
unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
- // Maximum call frame needs to be at least big enough for linkage and 8 args.
- unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
- Subtarget.isDarwinABI());
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI());
maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
// If we have dynamic alloca then maxCallFrameSize needs to be aligned so
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 94e9b67..7a226f7 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -14,23 +14,18 @@
#define POWERPC_FRAMEINFO_H
#include "PPC.h"
-#include "PPCSubtarget.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
- class PPCSubtarget;
+class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
public:
- PPCFrameLowering(const PPCSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0),
- Subtarget(sti) {
- }
+ PPCFrameLowering(const PPCSubtarget &STI);
unsigned determineFrameLayout(MachineFunction &MF,
bool UpdateMF = true,
@@ -79,6 +74,12 @@ public:
return isPPC64 ? 16 : 4;
}
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ static unsigned getTOCSaveOffset(void) {
+ return 40;
+ }
+
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
@@ -114,190 +115,9 @@ public:
return 8;
}
- /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
- /// argument area.
- static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
- // For the Darwin ABI / 64-bit SVR4 ABI:
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- if (isDarwinABI || isPPC64)
- return 8 * (isPPC64 ? 8 : 4);
-
- // 32-bit SVR4 ABI:
- // There is no default stack allocated for the 8 first GPR arguments.
- return 0;
- }
-
- /// getMinCallFrameSize - Return the minimum size a call frame can be using
- /// the PowerPC ABI.
- static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
- // The call frame needs to be at least big enough for linkage and 8 args.
- return getLinkageSize(isPPC64, isDarwinABI) +
- getMinCallArgumentsSize(isPPC64, isDarwinABI);
- }
-
- // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
- if (Subtarget.isDarwinABI()) {
- NumEntries = 1;
- if (Subtarget.isPPC64()) {
- static const SpillSlot darwin64Offsets = {PPC::X31, -8};
- return &darwin64Offsets;
- } else {
- static const SpillSlot darwinOffsets = {PPC::R31, -4};
- return &darwinOffsets;
- }
- }
-
- // Early exit if not using the SVR4 ABI.
- if (!Subtarget.isSVR4ABI()) {
- NumEntries = 0;
- return nullptr;
- }
-
- // Note that the offsets here overlap, but this is fixed up in
- // processFunctionBeforeFrameFinalized.
-
- static const SpillSlot Offsets[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::R31, -4},
- {PPC::R30, -8},
- {PPC::R29, -12},
- {PPC::R28, -16},
- {PPC::R27, -20},
- {PPC::R26, -24},
- {PPC::R25, -28},
- {PPC::R24, -32},
- {PPC::R23, -36},
- {PPC::R22, -40},
- {PPC::R21, -44},
- {PPC::R20, -48},
- {PPC::R19, -52},
- {PPC::R18, -56},
- {PPC::R17, -60},
- {PPC::R16, -64},
- {PPC::R15, -68},
- {PPC::R14, -72},
-
- // CR save area offset. We map each of the nonvolatile CR fields
- // to the slot for CR2, which is the first of the nonvolatile CR
- // fields to be assigned, so that we only allocate one save slot.
- // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
- {PPC::CR2, -4},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- static const SpillSlot Offsets64[] = {
- // Floating-point register save area offsets.
- {PPC::F31, -8},
- {PPC::F30, -16},
- {PPC::F29, -24},
- {PPC::F28, -32},
- {PPC::F27, -40},
- {PPC::F26, -48},
- {PPC::F25, -56},
- {PPC::F24, -64},
- {PPC::F23, -72},
- {PPC::F22, -80},
- {PPC::F21, -88},
- {PPC::F20, -96},
- {PPC::F19, -104},
- {PPC::F18, -112},
- {PPC::F17, -120},
- {PPC::F16, -128},
- {PPC::F15, -136},
- {PPC::F14, -144},
-
- // General register save area offsets.
- {PPC::X31, -8},
- {PPC::X30, -16},
- {PPC::X29, -24},
- {PPC::X28, -32},
- {PPC::X27, -40},
- {PPC::X26, -48},
- {PPC::X25, -56},
- {PPC::X24, -64},
- {PPC::X23, -72},
- {PPC::X22, -80},
- {PPC::X21, -88},
- {PPC::X20, -96},
- {PPC::X19, -104},
- {PPC::X18, -112},
- {PPC::X17, -120},
- {PPC::X16, -128},
- {PPC::X15, -136},
- {PPC::X14, -144},
-
- // VRSAVE save area offset.
- {PPC::VRSAVE, -4},
-
- // Vector register save area
- {PPC::V31, -16},
- {PPC::V30, -32},
- {PPC::V29, -48},
- {PPC::V28, -64},
- {PPC::V27, -80},
- {PPC::V26, -96},
- {PPC::V25, -112},
- {PPC::V24, -128},
- {PPC::V23, -144},
- {PPC::V22, -160},
- {PPC::V21, -176},
- {PPC::V20, -192}
- };
-
- if (Subtarget.isPPC64()) {
- NumEntries = array_lengthof(Offsets64);
-
- return Offsets64;
- } else {
- NumEntries = array_lengthof(Offsets);
-
- return Offsets;
- }
- }
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
};
-
} // End llvm namespace
#endif
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 7ca706b..d9b242c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -162,7 +162,8 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
unsigned Directive =
DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
// If we're using a special group-terminating nop, then we need only one.
- if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
return 1;
return 5 - CurSlots;
@@ -223,7 +224,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// If the group has now filled all of its slots, or if we're using a special
// group-terminating nop, the group is complete.
if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
- CurSlots == 6) {
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
@@ -258,8 +259,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
//
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
- : TM(TM) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
EndDispatchGroup();
}
@@ -278,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
isLoad = MCID.mayLoad();
isStore = MCID.mayStore();
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index cf4332c..23f76c1 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -54,7 +54,7 @@ public:
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
- const TargetMachine &TM;
+ const ScheduleDAG &DAG;
unsigned NumIssued; // Number of insts issued, including advanced cycles.
@@ -75,7 +75,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
unsigned NumStores;
public:
- PPCHazardRecognizer970(const TargetMachine &TM);
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
virtual void EmitInstruction(SUnit *SU) override;
virtual void AdvanceCycle() override;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 251e8b6..4881b3f 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -1454,10 +1454,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
break;
- // The first source operand is a TargetGlobalAddress or a
- // TargetJumpTable. If it is an externally defined symbol, a symbol
- // with common linkage, a function address, or a jump table address,
- // or if we are generating code for large code model, we generate:
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it is an externally defined symbol, a symbol with common linkage,
+ // a non-local function address, or a jump table address, or if we are
+ // generating code for large code model, we generate:
// LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
// Otherwise we generate:
// ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -1472,8 +1472,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
- GValue->hasAvailableExternallyLinkage())
+ if ((GValue->getType()->getElementType()->isFunctionTy() &&
+ (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index cf4c9e6..bc057bf 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -50,20 +51,18 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
-static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
- if (TM.getSubtargetImpl()->isDarwin())
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
return new TargetLoweringObjectFileMachO();
- if (TM.getSubtargetImpl()->isSVR4ABI())
- return new PPC64LinuxTargetObjectFile();
-
- return new TargetLoweringObjectFileELF();
+ return new PPC64LinuxTargetObjectFile();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
- : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
- const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+ Subtarget(*TM.getSubtargetImpl()) {
setPow2DivIsCheap();
// Use _setjmp/_longjmp instead of setjmp/longjmp.
@@ -72,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
// arguments are at least 4/8 bytes aligned.
- bool isPPC64 = Subtarget->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
setMinStackArgumentAlignment(isPPC64 ? 8:4);
// Set up the register classes.
@@ -98,10 +97,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (isPPC64 || Subtarget->hasFPCVT()) {
+ if (isPPC64 || Subtarget.hasFPCVT()) {
setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
isPPC64 ? MVT::i64 : MVT::i32);
@@ -176,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTE() && Subtarget->hasFRE()))
+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
setOperationAction(ISD::FSQRT, MVT::f64, Expand);
- if (!Subtarget->hasFSQRT() &&
+ if (!Subtarget.hasFSQRT() &&
!(TM.Options.UnsafeFPMath &&
- Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- if (Subtarget->hasFCPSGN()) {
+ if (Subtarget.hasFCPSGN()) {
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
} else {
@@ -194,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
}
- if (Subtarget->hasFPRND()) {
+ if (Subtarget.hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
@@ -216,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
- if (Subtarget->hasPOPCNTD()) {
+ if (Subtarget.hasPOPCNTD()) {
setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
} else {
@@ -228,7 +227,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::ROTR, MVT::i32 , Expand);
setOperationAction(ISD::ROTR, MVT::i64 , Expand);
- if (!Subtarget->useCRBits()) {
+ if (!Subtarget.useCRBits()) {
// PowerPC does not have Select
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::i64, Expand);
@@ -241,11 +240,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
// PowerPC wants to optimize integer setcc a bit
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::SETCC, MVT::i32, Custom);
// PowerPC does not have BRCOND which requires SetCC
- if (!Subtarget->useCRBits())
+ if (!Subtarget.useCRBits())
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
@@ -297,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
- if (Subtarget->isSVR4ABI()) {
+ if (Subtarget.isSVR4ABI()) {
if (isPPC64) {
// VAARG always uses double-word chunks, so promote anything smaller.
setOperationAction(ISD::VAARG, MVT::i1, Promote);
@@ -317,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
- if (Subtarget->isSVR4ABI() && !isPPC64)
+ if (Subtarget.isSVR4ABI() && !isPPC64)
// VACOPY is custom lowered with the 32-bit SVR4 ABI.
setOperationAction(ISD::VACOPY , MVT::Other, Custom);
else
@@ -350,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
// They also have instructions for converting between i64 and fp.
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
@@ -360,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot do this with Promote because i64 is not a legal type.
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64())
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
} else {
// PowerPC does not have FP_TO_UINT on 32-bit implementations.
@@ -368,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// With the instructions enabled under FPCVT, we can do everything.
- if (PPCSubTarget.hasFPCVT()) {
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
@@ -382,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
}
- if (Subtarget->use64BitRegs()) {
+ if (Subtarget.use64BitRegs()) {
// 64-bit PowerPC implementations can support i64 types directly
addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
// BUILD_PAIR can't be handled natively, and should be expanded to shl/or
@@ -398,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
- if (Subtarget->hasAltivec()) {
+ if (Subtarget.hasAltivec()) {
// First set operation action for all vector types to expand. Then we
// will selectively turn on ones that can be effectively codegen'd.
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -488,7 +487,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::XOR , MVT::v4i32, Legal);
setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
setOperationAction(ISD::SELECT, MVT::v4i32,
- Subtarget->useCRBits() ? Legal : Expand);
+ Subtarget.useCRBits() ? Legal : Expand);
setOperationAction(ISD::STORE , MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -507,7 +506,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::v4f32, Legal);
setOperationAction(ISD::FMA, MVT::v4f32, Legal);
- if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
}
@@ -535,7 +534,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
- if (Subtarget->hasVSX()) {
+ if (Subtarget.hasVSX()) {
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
@@ -613,7 +612,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
- if (Subtarget->has64BitSupport()) {
+ if (Subtarget.has64BitSupport()) {
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
}
@@ -642,7 +641,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setTargetDAGCombine(ISD::BRCOND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -651,7 +650,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
- if (Subtarget->useCRBits()) {
+ if (Subtarget.useCRBits()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::SELECT_CC);
@@ -664,7 +663,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
// Darwin long double math library functions have $LDBL128 appended.
- if (Subtarget->isDarwin()) {
+ if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
@@ -679,21 +678,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
- if (Subtarget->useCRBits())
+ if (Subtarget.useCRBits())
setHasMultipleConditionRegisters();
setMinFunctionAlignment(2);
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
setPrefFunctionAlignment(4);
- if (isPPC64 && Subtarget->isJITCodeModel())
+ if (isPPC64 && Subtarget.isJITCodeModel())
// Temporary workaround for the inability of PPC64 JIT to handle jump
// tables.
setSupportJumpTables(false);
setInsertFencesForAtomic(true);
- if (Subtarget->enableMachineScheduler())
+ if (Subtarget.enableMachineScheduler())
setSchedulingPreference(Sched::Source);
else
setSchedulingPreference(Sched::Hybrid);
@@ -702,8 +701,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// The Freescale cores does better with aggressive inlining of memcpy and
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
- if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
- Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
MaxStoresPerMemset = 32;
MaxStoresPerMemsetOptSize = 16;
MaxStoresPerMemcpy = 32;
@@ -747,14 +746,14 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
// Darwin passes everything on 4 byte boundary.
- if (PPCSubTarget.isDarwin())
+ if (Subtarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
- if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
- getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
return Align;
}
@@ -774,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::Hi: return "PPCISD::Hi";
case PPCISD::Lo: return "PPCISD::Lo";
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
- case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE";
case PPCISD::LOAD: return "PPCISD::LOAD";
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
@@ -826,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
- return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
@@ -855,15 +853,17 @@ static bool isConstantOrUndef(int Op, int Val) {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
-bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
if (!isUnary) {
for (unsigned i = 0; i != 16; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j))
return false;
} else {
for (unsigned i = 0; i != 8; ++i)
- if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+1))
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
return false;
}
return true;
@@ -871,18 +871,27 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
-bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG) {
+ unsigned j, k;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ j = 0;
+ k = 1;
+ } else {
+ j = 2;
+ k = 3;
+ }
if (!isUnary) {
for (unsigned i = 0; i != 16; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k))
return false;
} else {
for (unsigned i = 0; i != 8; i += 2)
- if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) ||
- !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) ||
- !isConstantOrUndef(N->getMaskElt(i+9), i*2+3))
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+k))
return false;
}
return true;
@@ -909,27 +918,39 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
}
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 8, 24);
- return isVMerge(N, UnitSize, 8, 8);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ }
}
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
-/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary) {
- if (!isUnary)
- return isVMerge(N, UnitSize, 0, 16);
- return isVMerge(N, UnitSize, 0, 0);
+ bool isUnary, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 8, 24);
+ return isVMerge(N, UnitSize, 8, 8);
+ } else {
+ if (!isUnary)
+ return isVMerge(N, UnitSize, 0, 16);
+ return isVMerge(N, UnitSize, 0, 0);
+ }
}
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
-int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
if (N->getValueType(0) != MVT::v16i8)
return -1;
@@ -946,18 +967,38 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
// numbered from this value.
unsigned ShiftAmt = SVOp->getMaskElt(i);
if (ShiftAmt < i) return -1;
- ShiftAmt -= i;
- if (!isUnary) {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
- return -1;
- } else {
- // Check the rest of the elements to see if they are consecutive.
- for (++i; i != 16; ++i)
- if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
- return -1;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+
+ ShiftAmt += i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
+ return -1;
+ }
+
+ } else { // Big Endian
+
+ ShiftAmt -= i;
+
+ if (!isUnary) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ }
}
return ShiftAmt;
}
@@ -1010,10 +1051,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
-unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
assert(isSplatShuffleMask(SVOp, EltSize));
- return SVOp->getMaskElt(0) / EltSize;
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
}
/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
@@ -1299,7 +1344,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
short Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
return true;
}
@@ -1350,7 +1395,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
}
// Otherwise, do it the hard way, using R0 as the base register.
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
N.getValueType());
Index = N;
return true;
@@ -1497,7 +1542,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1518,7 +1563,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1555,7 +1600,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
@@ -1646,7 +1691,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
// 64-bit SVR4 ABI code is always position-independent.
// The actual address of the GlobalValue is stored in the TOC.
- if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
@@ -1891,7 +1936,8 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0);
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
@@ -2086,6 +2132,43 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
return ArgSize;
}
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ Align = 16;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ return Align;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const TargetMachine &Target,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
SDValue
PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -2094,8 +2177,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
dl, DAG, InVals);
else
@@ -2161,7 +2244,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
getTargetMachine(), ArgLocs, *DAG.getContext());
// Reserve space for the linkage area on the stack.
- CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false);
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
@@ -2184,7 +2268,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
RC = &PPC::F4RCRegClass;
break;
case MVT::f64:
- if (PPCSubTarget.hasVSX())
+ if (Subtarget.hasVSX())
RC = &PPC::VSFRCRegClass;
else
RC = &PPC::F8RCRegClass;
@@ -2240,23 +2324,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// Area that is at least reserved in the caller of this function.
unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(false, false));
-
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
-
- FI->setMinReservedArea(MinReservedArea);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
SmallVector<SDValue, 8> MemOps;
@@ -2352,32 +2427,6 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
}
-// Set the size that is at least reserved in caller of this function. Tail
-// call optimized functions' reserved stack space needs to be aligned so that
-// taking the difference between two stack areas will result in an aligned
-// stack.
-void
-PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea,
- bool isPPC64) const {
- PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
- // Add the Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += 16*nAltivecParamsAtEnd;
- }
- MinReservedArea =
- std::max(MinReservedArea,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
- unsigned TargetAlign
- = DAG.getMachineFunction().getTarget().getFrameLowering()->
- getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
- FI->setMinReservedArea(MinReservedArea);
-}
-
SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
SDValue Chain,
@@ -2388,6 +2437,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
+ bool isLittleEndian = Subtarget.isLittleEndian();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
@@ -2398,9 +2448,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = 8;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- // Area that is at least reserved in caller of this function.
- unsigned MinReservedArea = ArgOffset;
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned ArgOffset = LinkageSize;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -2422,14 +2471,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
const unsigned Num_FPR_Regs = 13;
const unsigned Num_VR_Regs = array_lengthof(VR);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
// Add DAG nodes to load the arguments or copy them out of registers. On
// entry to a function on PPC, the arguments start after the linkage area,
// although the first ones are often in registers.
SmallVector<SDValue, 8> MemOps;
- unsigned nAltivecParamsAtEnd = 0;
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
@@ -2442,24 +2490,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
CurArgIdx = Ins[ArgNo].OrigArgIndex;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
unsigned CurArgOffset = ArgOffset;
- // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
- if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
- ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
- ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
- if (isVarArg) {
- MinReservedArea = ((MinReservedArea+15)/16)*16;
- MinReservedArea += CalculateStackSlotSize(ObjectVT,
- Flags,
- PtrByteSize);
- } else
- nAltivecParamsAtEnd++;
- } else
- // Calculate min reserved area.
- MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
- Flags,
- PtrByteSize);
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
// FIXME the codegen can be much improved in some cases.
// We do not have to keep everything in memory.
@@ -2481,14 +2520,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
continue;
}
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- CurArgOffset = ArgOffset;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
- if (ObjSize < PtrByteSize)
+ if (ObjSize < PtrByteSize && !isLittleEndian)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
// The value of the object is its address.
int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
@@ -2522,7 +2555,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
}
MemOps.push_back(Store);
- ++GPR_idx;
}
// Whether we copied from a register or not, advance the offset
// into the parameter save area by a full doubleword.
@@ -2567,8 +2599,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
// value to MVT::i64 and then truncate to the correct register size.
ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
-
- ++GPR_idx;
} else {
needsLoad = true;
ArgSize = PtrByteSize;
@@ -2578,18 +2608,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::f32:
case MVT::f64:
- // Every 8 bytes of argument space consumes one of the GPRs available for
- // argument passing.
- if (GPR_idx != Num_GPR_Regs) {
- ++GPR_idx;
- }
if (FPR_idx != Num_FPR_Regs) {
unsigned VReg;
if (ObjectVT == MVT::f32)
VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
else
- VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
&PPC::VSFRCRegClass :
&PPC::F8RCRegClass);
@@ -2608,39 +2633,25 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
- // Note that vector arguments in registers don't reserve stack space,
- // except in varargs functions.
if (VR_idx != Num_VR_Regs) {
unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
- if (isVarArg) {
- while ((ArgOffset % 16) != 0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != Num_GPR_Regs)
- GPR_idx++;
- }
- ArgOffset += 16;
- GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
- }
++VR_idx;
} else {
- // Vectors are aligned.
- ArgOffset = ((ArgOffset+15)/16)*16;
- CurArgOffset = ArgOffset;
- ArgOffset += 16;
needsLoad = true;
}
+ ArgOffset += 16;
break;
}
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type.
if (needsLoad) {
- int FI = MFI->CreateFixedObject(ObjSize,
- CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
false, false, false, 0);
@@ -2649,11 +2660,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(ArgVal);
}
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -2667,7 +2683,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// If this function is vararg, store any remaining integer argument regs
// to their spots on the stack so that they may be loaded by deferencing the
// result of va_next.
- for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -2706,7 +2723,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
(CallConv == CallingConv::Fast));
unsigned PtrByteSize = isPPC64 ? 8 : 4;
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
@@ -2997,11 +3015,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
InVals.push_back(ArgVal);
}
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
// Set the size that is at least reserved in caller of this function. Tail
// call optimized functions' reserved stack space needs to be aligned so that
// taking the difference between two stack areas will result in an aligned
// stack.
- setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64);
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
@@ -3040,75 +3068,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
return Chain;
}
-/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus
-/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI.
-static unsigned
-CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
- bool isPPC64,
- bool isVarArg,
- unsigned CC,
- const SmallVectorImpl<ISD::OutputArg>
- &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- unsigned &nAltivecParamsAtEnd) {
- // Count how many bytes are to be pushed on the stack, including the linkage
- // area, and parameter passing area. We start with 24/48 bytes, which is
- // prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
- unsigned NumOps = Outs.size();
- unsigned PtrByteSize = isPPC64 ? 8 : 4;
-
- // Add up all the space actually used.
- // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
- // they all go in registers, but we must reserve stack space for them for
- // possible use by the caller. In varargs or 64-bit calls, parameters are
- // assigned stack space in order, with padding so Altivec parameters are
- // 16-byte aligned.
- nAltivecParamsAtEnd = 0;
- for (unsigned i = 0; i != NumOps; ++i) {
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Outs[i].VT;
- // Varargs Altivec parameters are padded to a 16 byte boundary.
- if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
- ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
- ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
- if (!isVarArg && !isPPC64) {
- // Non-varargs Altivec parameters go after all the non-Altivec
- // parameters; handle those later so we know how much padding we need.
- nAltivecParamsAtEnd++;
- continue;
- }
- // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
- NumBytes = ((NumBytes+15)/16)*16;
- }
- NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
- }
-
- // Allow for Altivec parameters at the end, if needed.
- if (nAltivecParamsAtEnd) {
- NumBytes = ((NumBytes+15)/16)*16;
- NumBytes += 16*nAltivecParamsAtEnd;
- }
-
- // The prolog code of the callee may store up to 8 GPR argument registers to
- // the stack, allowing va_start to index over them in memory if its varargs.
- // Because we cannot tell if this is needed on the caller side, we have to
- // conservatively assume that it is needed. As such, make sure we have at
- // least enough stack space for the caller to store the 8 GPRs.
- NumBytes = std::max(NumBytes,
- PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
-
- // Tail call needs the stack to be aligned.
- if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
- unsigned TargetAlign = DAG.getMachineFunction().getTarget().
- getFrameLowering()->getStackAlignment();
- unsigned AlignMask = TargetAlign-1;
- NumBytes = (NumBytes + AlignMask) & ~AlignMask;
- }
-
- return NumBytes;
-}
-
/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
/// adjusted to accommodate the arguments for the tailcall.
static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
@@ -3280,7 +3239,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
- EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
LROpOut = getReturnAddrFrameIndex(DAG);
LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
false, false, false, 0);
@@ -3373,10 +3332,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
- const PPCSubtarget &PPCSubTarget) {
+ const PPCSubtarget &Subtarget) {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@@ -3385,11 +3344,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = PPCISD::CALL;
bool needIndirectCall = true;
- if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
- // If this is an absolute destination address, use the munged value.
- Callee = SDValue(Dest, 0);
- needIndirectCall = false;
- }
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
// XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
@@ -3398,8 +3358,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
unsigned OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
(G->getGlobal()->isDeclaration() ||
G->getGlobal()->isWeakForLinker())) {
// PC-relative references to external symbols should go through $stub,
@@ -3422,8 +3382,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned char OpFlags = 0;
if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
- (PPCSubTarget.getTargetTriple().isMacOSX() &&
- PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3497,8 +3457,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// additional register being allocated and an unnecessary move instruction
// being generated.
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
- Callee, InFlag);
+ AddTOC, InFlag);
Chain = LoadTOCPtr.getValue(0);
InFlag = LoadTOCPtr.getValue(1);
@@ -3613,10 +3575,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
SmallVector<SDValue, 8> Ops;
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
isTailCall, RegsToPass, Ops, NodeTys,
- PPCSubTarget);
+ Subtarget);
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
- if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
// When performing tail call optimization the callee pops its arguments off
@@ -3657,7 +3619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
// same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
- if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
if (CallOpc == PPCISD::BCTRL) {
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
@@ -3682,7 +3644,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
InFlag = Chain.getValue(1);
}
@@ -3718,8 +3685,8 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
- if (PPCSubTarget.isSVR4ABI()) {
- if (PPCSubTarget.isPPC64())
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
@@ -3981,6 +3948,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -3997,16 +3965,37 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with at least 48 bytes, which
// is reserved space for [SP][CR][LR][3 x unused].
- // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result
- // of this call.
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv,
- Outs, OutVals, nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4038,8 +4027,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
- unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
@@ -4068,6 +4057,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
SDValue PtrOff;
@@ -4099,15 +4097,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
- unsigned BVAlign = Flags.getByValAlign();
- if (BVAlign > 8) {
- if (BVAlign % PtrByteSize != 0)
- llvm_unreachable(
- "ByVal alignment is not a multiple of the pointer size");
-
- ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
- }
-
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -4116,7 +4105,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), VT,
false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
ArgOffset += PtrByteSize;
continue;
@@ -4124,9 +4113,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
}
if (GPR_idx == NumGPRs && Size < 8) {
- SDValue Const = DAG.getConstant(PtrByteSize - Size,
- PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4161,8 +4153,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// small aggregates, particularly for packed ones.
// FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
- SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
- SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
CallSeqStart,
Flags, DAG, dl);
@@ -4172,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(),
false, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
// Done with this argument.
ArgOffset += PtrByteSize;
@@ -4205,7 +4200,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::i32:
case MVT::i64:
if (GPR_idx != NumGPRs) {
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
} else {
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, false, MemOpChains,
@@ -4223,7 +4218,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32 &&
+ !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4239,15 +4235,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
MachinePointerInfo(), false, false,
false, 0);
MemOpChains.push_back(Load.getValue(1));
- RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
}
- } else if (GPR_idx != NumGPRs)
- // If we have any FPRs remaining, we may also have GPRs remaining.
- ++GPR_idx;
+ }
} else {
// Single-precision floating-point values are mapped to the
// second (rightmost) word of the stack doubleword.
- if (Arg.getValueType() == MVT::f32) {
+ if (Arg.getValueType() == MVT::f32 && !isLittleEndian) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
}
@@ -4264,21 +4258,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
case MVT::v16i8:
case MVT::v2f64:
case MVT::v2i64:
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
if (isVarArg) {
- // These go aligned on the stack, or in the corresponding R registers
- // when within range. The Darwin PPC ABI doc claims they also go in
- // V registers; in fact gcc does this only for arguments that are
- // prototyped, not for those that match the ... We do it for all
- // arguments, seems to work.
- while (ArgOffset % 16 !=0) {
- ArgOffset += PtrByteSize;
- if (GPR_idx != NumGPRs)
- GPR_idx++;
- }
// We could elide this store in the case where the object fits
// entirely in R registers. Maybe later.
- PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
- DAG.getConstant(ArgOffset, PtrVT));
SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
MachinePointerInfo(), false, false, 0);
MemOpChains.push_back(Store);
@@ -4309,10 +4295,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
break;
}
- // Non-varargs Altivec params generally go in registers, but have
- // stack space allocated at the end.
+ // Non-varargs Altivec params go into VRs or on the stack.
if (VR_idx != NumVRs) {
- // Doesn't have GPR space allocated.
unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
Arg.getSimpleValueType() == MVT::v2i64) ?
VSRH[VR_idx] : VR[VR_idx];
@@ -4323,12 +4307,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
true, isTailCall, true, MemOpChains,
TailCallArguments, dl);
- ArgOffset += 16;
}
+ ArgOffset += 16;
break;
}
}
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
@@ -4337,19 +4324,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// pointers in the 64-bit SVR4 ABI.
if (!isTailCall &&
!dyn_cast<GlobalAddressSDNode>(Callee) &&
- !dyn_cast<ExternalSymbolSDNode>(Callee) &&
- !isBLACompatibleAddress(Callee, DAG)) {
+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {
// Load r2 into a virtual register and store it to the TOC save area.
SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
// TOC save area offset.
- SDValue PtrOff = DAG.getIntPtrConstant(40);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
- // R12 must contain the address of an indirect callee. This does not
- // mean the MTCTR instruction must use R12; it's easier to model this
- // as an extra parameter, so do that.
- RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain
@@ -4397,15 +4380,55 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallConv == CallingConv::Fast)
MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
- unsigned nAltivecParamsAtEnd = 0;
-
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
// prereserved space for [SP][CR][LR][3 x unused].
- unsigned NumBytes =
- CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs, OutVals,
- nAltivecParamsAtEnd);
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -4441,7 +4464,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// memory. Also, if this is a vararg function, floating point operations
// must be stored to our stack, and loaded into integer regs as well, if
// any integer regs are available for argument passing.
- unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+ unsigned ArgOffset = LinkageSize;
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
static const MCPhysReg GPR_32[] = { // 32-bit registers.
@@ -4818,8 +4841,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -4842,8 +4865,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
SDValue
PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Get current frame pointer save index. The users of this index will be
@@ -5063,12 +5086,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
- (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
PPCISD::FCTIDZ),
dl, MVT::f64, Src);
break;
case MVT::i64:
- assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
"i64 FP_TO_UINT is supported only with FPCVT");
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
PPCISD::FCTIDUZ,
@@ -5077,8 +5100,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
}
// Convert the FP value to an int value through memory.
- bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() &&
- (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT());
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
@@ -5120,17 +5143,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
DAG.getConstantFP(1.0, Op.getValueType()),
DAG.getConstantFP(0.0, Op.getValueType()));
- assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
// If we have FCFIDS, then use it when converting to single-precision.
// Otherwise, convert to double-precision and then round.
- unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDUS : PPCISD::FCFIDS) :
(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::FCFIDU : PPCISD::FCFID);
- MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
MVT::f32 : MVT::f64;
if (Op.getOperand(0).getValueType() == MVT::i64) {
@@ -5146,7 +5169,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// However, if -enable-unsafe-fp-math is in effect, accept double
// rounding to avoid the extra overhead.
if (Op.getValueType() == MVT::f32 &&
- !PPCSubTarget.hasFPCVT() &&
+ !Subtarget.hasFPCVT() &&
!DAG.getTarget().Options.UnsafeFPMath) {
// Twiddle input to make sure the low 11 bits are zero. (If this
@@ -5184,7 +5207,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl,
MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
@@ -5201,7 +5224,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue Ld;
- if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) {
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -5220,7 +5243,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
dl, DAG.getVTList(MVT::f64, MVT::Other),
Ops, MVT::i32, MMO);
} else {
- assert(PPCSubTarget.isPPC64() &&
+ assert(Subtarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
@@ -5242,7 +5265,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
// FCFID it and return it.
SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
- if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
return FP;
}
@@ -5557,6 +5580,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
+ // The remaining cases assume either big endian element order or
+ // a splat-size that equates to the element size of the vector
+ // to be built. An example that doesn't work for little endian is
+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
+ // and a vector element size of 16 bits. The code below will
+ // produce the vector in big endian element order, which for little
+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
+
+ // For now, just avoid these optimizations in that case.
+ // FIXME: Develop correct optimizations for LE with mismatched
+ // splat and element sizes.
+
+ if (Subtarget.isLittleEndian() &&
+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
+ return SDValue();
+
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -5725,6 +5764,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Cases that are handled by instructions that take permute immediates
// (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
@@ -5733,15 +5773,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
if (PPC::isSplatShuffleMask(SVOp, 1) ||
PPC::isSplatShuffleMask(SVOp, 2) ||
PPC::isSplatShuffleMask(SVOp, 4) ||
- PPC::isVPKUWUMShuffleMask(SVOp, true) ||
- PPC::isVPKUHUMShuffleMask(SVOp, true) ||
- PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+ PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) {
return Op;
}
}
@@ -5749,15 +5789,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// Altivec has a variety of "shuffle immediates" that take two vector inputs
// and produce a fixed permutation. If any of these match, do not lower to
// VPERM.
- if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
- PPC::isVPKUHUMShuffleMask(SVOp, false) ||
- PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
- PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
- PPC::isVMRGHShuffleMask(SVOp, 4, false))
+ if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG))
return Op;
// Check to see if this is a shuffle of 4-byte values. If so, we can use our
@@ -5791,7 +5831,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// If this shuffle can be expressed as a shuffle of 4-byte elements, use the
// perfect shuffle vector to determine if it is cost effective to do this as
// discrete instructions, or whether we should use a vperm.
- if (isFourElementShuffle) {
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
@@ -5820,6 +5862,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
// The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
// that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
EVT EltVT = V1.getValueType().getVectorElementType();
unsigned BytesPerElement = EltVT.getSizeInBits()/8;
@@ -5828,13 +5875,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
for (unsigned j = 0; j != BytesPerElement; ++j)
- ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
- MVT::i32));
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
+ MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
ResultMask);
- return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
}
/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
@@ -6027,6 +6083,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, Zero, DAG, dl);
} else if (Op.getValueType() == MVT::v16i8) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
// Multiply the even 8-bit parts, producing 16-bit sums.
SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
@@ -6038,13 +6095,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
LHS, RHS, DAG, dl, MVT::v8i16);
OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
- // Merge the results together.
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
int Ops[16];
for (unsigned i = 0; i != 8; ++i) {
- Ops[i*2 ] = 2*i+1;
- Ops[i*2+1] = 2*i+1+16;
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
}
- return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
} else {
llvm_unreachable("Unknown mul to lower!");
}
@@ -6064,17 +6132,17 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::VASTART:
- return LowerVASTART(Op, DAG, PPCSubTarget);
+ return LowerVASTART(Op, DAG, Subtarget);
case ISD::VAARG:
- return LowerVAARG(Op, DAG, PPCSubTarget);
+ return LowerVAARG(Op, DAG, Subtarget);
case ISD::VACOPY:
- return LowerVACOPY(Op, DAG, PPCSubTarget);
+ return LowerVACOPY(Op, DAG, Subtarget);
- case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
case ISD::DYNAMIC_STACKALLOC:
- return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -6144,7 +6212,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
EVT VT = N->getValueType(0);
if (VT == MVT::i64) {
- SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
Results.push_back(NewNode);
Results.push_back(NewNode.getValue(1));
@@ -6255,7 +6323,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
// lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
// registers without caring whether they're 32 or 64, but here we're
// doing actual arithmetic on the addresses.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -6450,7 +6518,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
unsigned BufReg = MI->getOperand(1).getReg();
- if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
.addImm(TOCOffset)
@@ -6463,12 +6531,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned BaseReg;
if (MF->getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::Naked))
- BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
else
- BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
MIB = BuildMI(*thisMBB, MI, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
.addReg(BaseReg)
.addImm(BPOffset)
.addReg(BufReg);
@@ -6492,10 +6560,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// mainMBB:
// mainDstReg = 0
MIB = BuildMI(mainMBB, DL,
- TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
// Store IP
- if (PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
.addImm(LabelOffset)
@@ -6607,7 +6675,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
- if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
.addImm(TOCOffset)
.addReg(BufReg);
@@ -6645,7 +6713,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
- if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_I4 ||
MI->getOpcode() == PPC::SELECT_I8)) {
@@ -6765,13 +6833,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
- BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
- BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
- BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
- BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
@@ -6862,7 +6930,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// We must use 64-bit registers for addresses when targeting 64-bit,
// since we're actually doing arithmetic on them. Other registers
// can be 32-bit.
- bool is64bit = PPCSubTarget.isPPC64();
+ bool is64bit = Subtarget.isPPC64();
bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
unsigned dest = MI->getOperand(0).getReg();
@@ -7070,10 +7138,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
@@ -7086,7 +7154,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7133,10 +7201,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
EVT VT = Op.getValueType();
- if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
- (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) ||
- (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
- (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal sqrt, we need to find the zero of the function:
@@ -7149,7 +7217,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
// correct after every iteration. The minimum architected relative
// accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
// 23 digits and double has 52 digits.
- int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3;
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
++Iterations;
@@ -7266,10 +7334,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (!Visited.count(ChainLD->getChain().getNode()))
Queue.push_back(ChainLD->getChain().getNode());
} else if (ChainNext->getOpcode() == ISD::TokenFactor) {
- for (SDNode::op_iterator O = ChainNext->op_begin(),
- OE = ChainNext->op_end(); O != OE; ++O)
- if (!Visited.count(O->getNode()))
- Queue.push_back(O->getNode());
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
} else
LoadRoots.insert(ChainNext);
}
@@ -7312,7 +7379,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
- assert(PPCSubTarget.useCRBits() &&
+ assert(Subtarget.useCRBits() &&
"Expecting to be tracking CR bits");
// If we're tracking CR bits, we need to be careful that we don't have:
// trunc(binary-ops(zext(x), zext(y)))
@@ -7610,9 +7677,9 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
return SDValue();
if (!((N->getOperand(0).getValueType() == MVT::i1 &&
- PPCSubTarget.useCRBits()) ||
+ Subtarget.useCRBits()) ||
(N->getOperand(0).getValueType() == MVT::i32 &&
- PPCSubTarget.isPPC64())))
+ Subtarget.isPPC64())))
return SDValue();
if (N->getOperand(0).getOpcode() != ISD::AND &&
@@ -7930,8 +7997,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
if (RV.getNode()) {
- // Unfortunately, RV is now NaN if the input was exactly 0. Select out
- // this case and force the answer to 0.
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
EVT VT = RV.getValueType();
@@ -8051,6 +8118,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// This is a type-legal unaligned Altivec load.
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
// This implements the loading of unaligned vectors as described in
// the venerable Apple Velocity Engine overview. Specifically:
@@ -8058,25 +8126,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
//
// The general idea is to expand a sequence of one or more unaligned
- // loads into a alignment-based permutation-control instruction (lvsl),
- // a series of regular vector loads (which always truncate their
- // input address to an aligned address), and a series of permutations.
- // The results of these permutations are the requested loaded values.
- // The trick is that the last "extra" load is not taken from the address
- // you might suspect (sizeof(vector) bytes after the last requested
- // load), but rather sizeof(vector) - 1 bytes after the last
- // requested vector. The point of this is to avoid a page fault if the
- // base address happened to be aligned. This works because if the base
- // address is aligned, then adding less than a full vector length will
- // cause the last vector in the sequence to be (re)loaded. Otherwise,
- // the next vector will be fetched as you might suspect was necessary.
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
// We might be able to reuse the permutation generation from
// a different base address offset from this one by an aligned amount.
// The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
// optimization later.
- SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
- DAG, dl, MVT::v16i8);
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
// Refine the alignment of the original load (a "new" load created here
// which was identical to the first except for the alignment would be
@@ -8125,8 +8196,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (ExtraLoad.getValueType() != MVT::v4i32)
ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
- SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
- BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
if (VT != MVT::v4i32)
Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
@@ -8151,12 +8232,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
++UI;
SmallVector<SDValue, 8> Ops;
- for (SDNode::op_iterator O = User->op_begin(),
- OE = User->op_end(); O != OE; ++O) {
- if (*O == Use)
+ for (const SDUse &O : User->ops()) {
+ if (O == Use)
Ops.push_back(To);
else
- Ops.push_back(*O);
+ Ops.push_back(O);
}
DAG.UpdateNodeOperands(User, Ops);
@@ -8166,9 +8246,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
break;
- case ISD::INTRINSIC_WO_CHAIN:
- if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl &&
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
N->getOperand(1)->getOpcode() == ISD::ADD) {
SDValue Add = N->getOperand(1);
@@ -8180,8 +8263,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
UE = BasePtr->use_end(); UI != UE; ++UI) {
if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- Intrinsic::ppc_altivec_lvsl) {
- // We've found another LVSL, and this address if an aligned
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
@@ -8190,6 +8273,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
}
+ }
break;
case ISD::BSWAP:
@@ -8537,11 +8621,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
case 'b': // R1-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
case 'r': // R0-R31
- if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+ if (VT == MVT::i64 && Subtarget.isPPC64())
return std::make_pair(0U, &PPC::G8RCRegClass);
return std::make_pair(0U, &PPC::GPRCRegClass);
case 'f':
@@ -8573,7 +8657,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
// register.
// FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
// the AsmName field from *RegisterInfo.td, then this would not be necessary.
- if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
PPC::GPRCRegClass.contains(R.first)) {
const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
return std::make_pair(TRI->getMatchingSuperReg(R.first,
@@ -8707,8 +8791,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
// the stack.
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
@@ -8762,8 +8846,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
// this table could be generated automatically from RegInfo.
unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
EVT VT) const {
- bool isPPC64 = PPCSubTarget.isPPC64();
- bool isDarwinABI = PPCSubTarget.isDarwinABI();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
(!isPPC64 && VT != MVT::i32))
@@ -8804,7 +8888,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- if (this->PPCSubTarget.isPPC64()) {
+ if (Subtarget.isPPC64()) {
return MVT::i64;
} else {
return MVT::i32;
@@ -8863,7 +8947,7 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return false;
if (VT.getSimpleVT().isVector()) {
- if (PPCSubTarget.hasVSX()) {
+ if (Subtarget.hasVSX()) {
if (VT != MVT::v2f64 && VT != MVT::v2i64)
return false;
} else {
@@ -8907,7 +8991,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 080ef5d..df05aa5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -18,7 +18,6 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCRegisterInfo.h"
-#include "PPCSubtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
@@ -71,19 +70,14 @@ namespace llvm {
TOC_ENTRY,
- /// The following three target-specific nodes are used for calls through
+ /// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
- /// Restore the TOC from the TOC save area of the current stack frame.
- /// This is basically a hard coded load instruction which additionally
- /// takes/produces a flag.
- TOC_RESTORE,
-
/// Like a regular LOAD but additionally taking/producing a flag.
LOAD,
- /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
- /// a hard coded load instruction.
+ /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+ /// destination.
LOAD_TOC,
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
@@ -303,25 +297,27 @@ namespace llvm {
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
- bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
- bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
+ SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
- bool isUnary);
+ bool isUnary, SelectionDAG &DAG);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
/// amount, otherwise return -1.
- int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+ int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
@@ -334,7 +330,7 @@ namespace llvm {
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
- unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
@@ -343,8 +339,9 @@ namespace llvm {
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
}
+ class PPCSubtarget;
class PPCTargetLowering : public TargetLowering {
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget &Subtarget;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -613,11 +610,6 @@ namespace llvm {
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
SDValue ArgVal, SDLoc dl) const;
- void
- setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
- unsigned nAltivecParamsAtEnd,
- unsigned MinReservedArea, bool isPPC64) const;
-
SDValue
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index b71c09e..9318f70 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
-let hasSideEffects = 1, isCodeGenOnly = 1 in {
-let RST = 2, DS = 2 in
-def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg),
- "ld 2, 8($reg)", IIC_LdStLD,
- [(PPCload_toc i64:$reg)]>, isPPC64;
-
-let RST = 2, DS = 10, RA = 1 in
-def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins),
- "ld 2, 40(1)", IIC_LdStLD,
- [(PPCtoc_restore)]>, isPPC64;
-}
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+ "ld 2, $src", IIC_LdStLD,
+ [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index f3c2eab..dce46d8 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -22,111 +22,127 @@ def vnot_ppc : PatFrag<(ops node:$in),
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false,
+ *CurDAG);
}]>;
def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true,
+ *CurDAG);
}]>;
def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false,
+ *CurDAG);
}]>;
def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false,
+ *CurDAG);
}]>;
def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false,
+ *CurDAG);
}]>;
def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true,
+ *CurDAG);
}]>;
def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true,
+ *CurDAG);
}]>;
def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true,
+ *CurDAG);
}]>;
def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG));
}]>;
def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, false) != -1;
+ return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1;
}], VSLDOI_get_imm>;
/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
/// vector_shuffle(X,undef,mask) by the dag combiner.
def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG));
}]>;
def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
- return PPC::isVSLDOIShuffleMask(N, true) != -1;
+ return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1;
}], VSLDOI_unary_get_imm>;
// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
}]>;
def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
}], VSPLTB_get_imm>;
def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
}]>;
def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
}], VSPLTH_get_imm>;
def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
- return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
}]>;
def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 7fed2c6..1e4396c 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
let Inst{30-31} = xo;
}
-class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> RST;
- bits<14> DS;
- bits<5> RA;
-
- let Pattern = pattern;
-
- let Inst{6-10} = RST;
- let Inst{11-15} = RA;
- let Inst{16-29} = DS;
- let Inst{30-31} = xo;
-}
// 1.7.6 X-Form
class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index fd72384..9bac91d 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -60,23 +61,25 @@ cl::Hidden);
// Pin the vtable to this file.
void PPCInstrInfo::anchor() {}
-PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
- : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl()) {}
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
-ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
- const TargetMachine *TM,
- const ScheduleDAG *DAG) const {
- unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
- const InstrItineraryData *II = TM->getInstrItineraryData();
+ const InstrItineraryData *II =
+ &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
return new ScoreboardHazardRecognizer(II, DAG);
}
- return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
}
/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -84,17 +87,18 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
- if (Directive == PPC::DIR_PWR7)
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
- assert(TM.getInstrInfo() && "No InstrInfo?");
+ assert(DAG->TII && "No InstrInfo?");
- return new PPCHazardRecognizer970(TM);
+ return new PPCHazardRecognizer970(*DAG);
}
return new ScoreboardHazardRecognizer(II, DAG);
@@ -129,7 +133,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
// On some cores, there is an additional delay between writing to a condition
// register, and using it from a branch.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
switch (Directive) {
default: break;
case PPC::DIR_7400:
@@ -142,6 +146,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case PPC::DIR_PWR6:
case PPC::DIR_PWR6X:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
Latency += 2;
break;
}
@@ -313,12 +318,13 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
// This function is used for scheduling, and the nop wanted here is the type
// that terminates dispatch groups on the POWER cores.
- unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ unsigned Directive = Subtarget.getDarwinDirective();
unsigned Opcode;
switch (Directive) {
default: Opcode = PPC::NOP; break;
case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
}
DebugLoc DL;
@@ -332,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
@@ -538,7 +544,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"PPC branch conditions have two components!");
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
// One-way branch.
if (!FBB) {
@@ -579,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
const SmallVectorImpl<MachineOperand> &Cond,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
- if (!TM.getSubtargetImpl()->hasISEL())
+ if (!Subtarget.hasISEL())
return false;
if (Cond.size() != 2)
@@ -623,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
assert(Cond.size() == 2 &&
"PPC branch conditions have two components!");
- assert(TM.getSubtargetImpl()->hasISEL() &&
+ assert(Subtarget.hasISEL() &&
"Cannot insert select on target without ISEL support");
// Get the register classes.
@@ -826,7 +832,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
.addReg(SrcReg,
@@ -921,7 +927,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
FrameIdx));
NonRI = true;
} else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
- assert(TM.getSubtargetImpl()->isDarwin() &&
+ assert(Subtarget.isDarwin() &&
"VRSAVE only needs spill/restore on Darwin");
NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
get(PPC::RESTORE_VRSAVE),
@@ -1035,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
unsigned ZeroReg;
if (UseInfo->isLookupPtrRegClass()) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
} else {
ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
@@ -1102,7 +1108,7 @@ bool PPCInstrInfo::PredicateInstruction(
unsigned OpC = MI->getOpcode();
if (OpC == PPC::BLR) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
(isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
@@ -1124,7 +1130,7 @@ bool PPCInstrInfo::PredicateInstruction(
return true;
} else if (OpC == PPC::B) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
MI->setDesc(get(Pred[0].getImm() ?
(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
@@ -1162,7 +1168,7 @@ bool PPCInstrInfo::PredicateInstruction(
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
@@ -1323,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// for equality checks (as those don't depend on the sign). On PPC64,
// we are restricted to equality for unsigned 64-bit comparisons and for
// signed 32-bit comparisons the applicability is more restricted.
- bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
+ bool isPPC64 = Subtarget.isPPC64();
bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index d9db3e1..83f14c6 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -65,7 +65,7 @@ enum PPC970_Unit {
class PPCInstrInfo : public PPCGenInstrInfo {
- PPCTargetMachine &TM;
+ PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
bool StoreRegToStackSlot(MachineFunction &MF,
@@ -80,7 +80,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool &NonRI, bool &SpillsVRS) const;
virtual void anchor();
public:
- explicit PPCInstrInfo(PPCTargetMachine &TM);
+ explicit PPCInstrInfo(PPCSubtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -89,7 +89,7 @@ public:
const PPCRegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
- CreateTargetHazardRecognizer(const TargetMachine *TM,
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index e421f8e..c2e3382 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -141,9 +141,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPSideEffect,
SDNPInGlue, SDNPOutGlue]>;
-def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
- [SDNPHasChain, SDNPSideEffect,
- SDNPInGlue, SDNPOutGlue]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 7bbc71b..e5f113a 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -13,7 +13,7 @@
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
-#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -25,6 +25,11 @@ using namespace llvm;
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
+ : Subtarget(STI), is64Bit(STI.isPPC64()) {
+ useGOT = 0;
+}
+
#define BUILD_ADDIS(RD,RS,IMM16) \
((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
#define BUILD_ORI(RD,RS,UIMM16) \
@@ -393,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
- } else if (TM.getSubtargetImpl()->isDarwinABI()){
+ } else if (Subtarget.isDarwinABI()){
JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
JCE.emitWordBE(0x7d6802a6); // mflr r11
JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 0693e3e..b6b37ff 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -18,32 +18,29 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class PPCTargetMachine;
+class PPCSubtarget;
+class PPCJITInfo : public TargetJITInfo {
+protected:
+ PPCSubtarget &Subtarget;
+ bool is64Bit;
- class PPCJITInfo : public TargetJITInfo {
- protected:
- PPCTargetMachine &TM;
- bool is64Bit;
- public:
- PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
- useGOT = 0;
- is64Bit = tmIs64Bit;
- }
+public:
+ PPCJITInfo(PPCSubtarget &STI);
- StubLayout getStubLayout() override;
- void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE) override;
- LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
- void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase) override;
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
+ unsigned char *GOTBase) override;
- /// replaceMachineCodeForFunction - Make it so that calling the function
- /// whose machine code is at OLD turns into a call to NEW, perhaps by
- /// overwriting OLD with a branch to NEW. This is used for self-modifying
- /// code.
- ///
- void replaceMachineCodeForFunction(void *Old, void *New) override;
- };
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
+};
}
#endif
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index e333b51..eca774e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -973,6 +973,14 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
Offset += MI.getOperand(OffsetOperandNo).getImm();
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
}
bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index f742f72..dc16742 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -16,9 +16,7 @@ using namespace llvm;
#define DEBUG_TYPE "powerpc-selectiondag-info"
-PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
-PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
-}
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 341b69c..b2e7f3b 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class PPCTargetMachine;
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+ explicit PPCSelectionDAGInfo(const DataLayout *DL);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ea9daee..2e1b74a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -32,15 +32,57 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
- CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
- OptLevel(OptLevel) {
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
+ return *this;
}
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM,
+ bool is64Bit, CodeGenOpt::Level OptLevel)
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ TLInfo(TM), TSInfo(&DL) {}
+
/// SetJITMode - This is called to inform the subtarget info that we are
/// producing code for the JIT.
void PPCSubtarget::SetJITMode() {
@@ -156,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine endianness.
IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+
+ // FIXME: For now, we disable VSX in little-endian mode until endian
+ // issues in those instructions can be addressed.
+ if (IsLittleEndian)
+ HasVSX = false;
}
/// hasLazyResolverStub - Return true if accesses to the specified global have
@@ -200,6 +247,7 @@ static bool needsAggressiveScheduling(unsigned Directive) {
case PPC::DIR_E500mc:
case PPC::DIR_E5500:
case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
return true;
}
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index ee43fd5..2a16699 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,7 +14,13 @@
#ifndef POWERPCSUBTARGET_H
#define POWERPCSUBTARGET_H
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -50,6 +56,7 @@ namespace PPC {
DIR_PWR6,
DIR_PWR6X,
DIR_PWR7,
+ DIR_PWR8,
DIR_64
};
}
@@ -102,12 +109,19 @@ protected:
/// OptLevel - What default optimization level we're emitting code for.
CodeGenOpt::Level OptLevel;
+ PPCFrameLowering FrameLowering;
+ const DataLayout DL;
+ PPCInstrInfo InstrInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
PPCSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit,
+ const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
CodeGenOpt::Level OptLevel);
/// ParseSubtargetFeatures - Parses features string setting specified
@@ -127,10 +141,21 @@ public:
///
unsigned getDarwinDirective() const { return DarwinDirective; }
- /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// getInstrItins - Return the instruction itineraries based on subtarget
/// selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ PPCJITInfo *getJITInfo() { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
/// \brief Reset the features for the PowerPC target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 2323add..9563b90 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -37,53 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() {
RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
-/// Return the datalayout string of a subtarget.
-static std::string getDataLayoutString(const PPCSubtarget &ST) {
- const Triple &T = ST.getTargetTriple();
-
- std::string Ret;
-
- // Most PPC* platforms are big endian, PPC64LE is little endian.
- if (ST.isLittleEndian())
- Ret = "e";
- else
- Ret = "E";
-
- Ret += DataLayout::getManglingComponent(T);
-
- // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
- // pointers.
- if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
- Ret += "-p:32:32";
-
- // Note, the alignment values for f64 and i64 on ppc64 in Darwin
- // documentation are wrong; these are correct (i.e. "what gcc does").
- if (ST.isPPC64() || ST.isSVR4ABI())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
- if (ST.isPPC64())
- Ret += "-n32:64";
- else
- Ret += "-n32";
-
- return Ret;
-}
-
-PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool is64Bit)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64Bit, OL),
- DL(getDataLayoutString(Subtarget)), InstrInfo(*this),
- FrameLowering(Subtarget), JITInfo(*this, is64Bit),
- TLInfo(*this), TSInfo(*this),
- InstrItins(Subtarget.getInstrItineraryData()) {
+ CodeGenOpt::Level OL, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
initAsmInfo();
}
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 9e92494..4c7029c 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -14,11 +14,7 @@
#ifndef PPC_TARGETMACHINE_H
#define PPC_TARGETMACHINE_H
-#include "PPCFrameLowering.h"
-#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "PPCJITInfo.h"
-#include "PPCSelectionDAGInfo.h"
#include "PPCSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,13 +25,6 @@ namespace llvm {
///
class PPCTargetMachine : public LLVMTargetMachine {
PPCSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- PPCInstrInfo InstrInfo;
- PPCFrameLowering FrameLowering;
- PPCJITInfo JITInfo;
- PPCTargetLowering TLInfo;
- PPCSelectionDAGInfo TSInfo;
- InstrItineraryData InstrItins;
public:
PPCTargetMachine(const Target &T, StringRef TT,
@@ -43,25 +32,29 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
- const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const PPCFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ const PPCInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
}
- PPCJITInfo *getJITInfo() override { return &JITInfo; }
+ const PPCFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const PPCTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- const PPCRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
}
- const DataLayout *getDataLayout() const override { return &DL; }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
// Pass Pipeline Configuration
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 949fdfb..713fc4b 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -17,6 +17,7 @@
namespace llvm {
class AMDGPUInstrPrinter;
+class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
class MCAsmInfo;
@@ -40,6 +41,7 @@ FunctionPass *createSIAnnotateControlFlowPass();
FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
@@ -47,14 +49,18 @@ void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
// Passes common to R600 and SI
+FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
/// \brief Creates an AMDGPU-specific Target Transformation Info pass.
ImmutablePass *
createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM);
+void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
+extern char &SIFixSGPRLiveRangesID;
+
+
extern Target TheAMDGPUTarget;
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index 2edc115..6ff9ab7 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -7,8 +7,7 @@
//
//==-----------------------------------------------------------------------===//
-// Include AMDIL TD files
-include "AMDILBase.td"
+include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// Subtarget Features
@@ -33,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt",
"false",
"Disable the if conversion pass">;
-def FeatureFP64 : SubtargetFeature<"fp64",
+def FeatureFP64 : SubtargetFeature<"fp64",
"FP64",
"true",
- "Enable 64bit double precision operations">;
+ "Enable double precision operations">;
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
"Is64bit",
"true",
- "Specify if 64bit addressing should be used.">;
-
-def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
- "Is32on64bit",
- "false",
- "Specify if 64bit sized pointers with 32bit addressing should be used.">;
+ "Specify if 64-bit addressing should be used">;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
"R600ALUInst",
"false",
- "Older version of ALU instructions encoding.">;
+ "Older version of ALU instructions encoding">;
def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
"HasVertexCache",
"true",
- "Specify use of dedicated vertex cache.">;
+ "Specify use of dedicated vertex cache">;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
"CaymanISA",
@@ -87,28 +81,40 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+ "localmemorysize"#Value,
+ "LocalMemorySize",
+ !cast<string>(Value),
+ "The size of local memory in bytes">;
+
class SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
Value#" GPU generation", Implies>;
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
def FeatureR600 : SubtargetFeatureGeneration<"R600",
- [FeatureR600ALUInst, FeatureFetchLimit8]>;
+ [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>;
def FeatureR700 : SubtargetFeatureGeneration<"R700",
- [FeatureFetchLimit16]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize0]>;
def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
- [FeatureFetchLimit16]>;
+ [FeatureFetchLimit16, FeatureLocalMemorySize32768]>;
def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
- [FeatureFetchLimit16, FeatureWavefrontSize64]>;
+ [FeatureFetchLimit16, FeatureWavefrontSize64,
+ FeatureLocalMemorySize32768]
+>;
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
- [Feature64BitPtr, FeatureFP64]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
- [Feature64BitPtr, FeatureFP64]>;
+ [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>;
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -120,6 +126,10 @@ def AMDGPU : Target {
let InstructionSet = AMDGPUInstrInfo;
}
+// Dummy Instruction itineraries for pseudo instructions
+def ALU_NULL : FuncUnit;
+def NullALU : InstrItinClass;
+
//===----------------------------------------------------------------------===//
// Predicate helper class
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index 170f479..a6e217b 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -19,6 +19,7 @@
#include "AMDGPUAsmPrinter.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600RegisterInfo.h"
@@ -35,6 +36,24 @@
using namespace llvm;
+// TODO: This should get the default rounding mode from the kernel. We just set
+// the default here, but this could change if the OpenCL rounding mode pragmas
+// are used.
+//
+// The denormal mode here should match what is reported by the OpenCL runtime
+// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but
+// can also be override to flush with the -cl-denorms-are-zero compiler flag.
+//
+// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double
+// precision, and leaves single precision to flush all and does not report
+// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports
+// CL_FP_DENORM for both.
+static uint32_t getFPMode(MachineFunction &) {
+ return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
+ FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
+ FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) |
+ FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE);
+}
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
MCStreamer &Streamer) {
@@ -92,6 +111,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
false);
OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
false);
+ OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode),
+ false);
+ OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode),
+ false);
} else {
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
OutStreamer.emitRawComment(
@@ -279,16 +302,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (VCCUsed)
MaxSGPR += 2;
- ProgInfo.CodeLen = CodeSize;
- ProgInfo.NumSGPR = MaxSGPR;
ProgInfo.NumVGPR = MaxVGPR;
+ ProgInfo.NumSGPR = MaxSGPR;
+
+ // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
+ // register.
+ ProgInfo.FloatMode = getFPMode(MF);
+
+ // XXX: Not quite sure what this does, but sc seems to unset this.
+ ProgInfo.IEEEMode = 0;
+
+ // Do not clamp NAN to 0.
+ ProgInfo.DX10Clamp = 0;
+
+ ProgInfo.CodeLen = CodeSize;
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
const SIProgramInfo &KernelInfo) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
unsigned RsrcReg;
switch (MFI->ShaderType) {
default: // Fall through
@@ -298,25 +332,41 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break;
}
- OutStreamer.EmitIntValue(RsrcReg, 4);
- OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
- S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
-
unsigned LDSAlignShift;
if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
- // LDS is allocated in 64 dword blocks
+ // LDS is allocated in 64 dword blocks.
LDSAlignShift = 8;
} else {
- // LDS is allocated in 128 dword blocks
+ // LDS is allocated in 128 dword blocks.
LDSAlignShift = 9;
}
+
unsigned LDSBlocks =
- RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
+ RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift;
if (MFI->ShaderType == ShaderType::COMPUTE) {
+ OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4);
+
+ const uint32_t ComputePGMRSrc1 =
+ S_00B848_VGPRS(KernelInfo.NumVGPR / 4) |
+ S_00B848_SGPRS(KernelInfo.NumSGPR / 8) |
+ S_00B848_PRIORITY(KernelInfo.Priority) |
+ S_00B848_FLOAT_MODE(KernelInfo.FloatMode) |
+ S_00B848_PRIV(KernelInfo.Priv) |
+ S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) |
+ S_00B848_IEEE_MODE(KernelInfo.DebugMode) |
+ S_00B848_IEEE_MODE(KernelInfo.IEEEMode);
+
+ OutStreamer.EmitIntValue(ComputePGMRSrc1, 4);
+
OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
+ } else {
+ OutStreamer.EmitIntValue(RsrcReg, 4);
+ OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) |
+ S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4);
}
+
if (MFI->ShaderType == ShaderType::PIXEL) {
OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 71adc9a..c1acb6e 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -25,13 +25,28 @@ class AMDGPUAsmPrinter : public AsmPrinter {
private:
struct SIProgramInfo {
SIProgramInfo() :
- CodeLen(0),
+ NumVGPR(0),
NumSGPR(0),
- NumVGPR(0) {}
+ Priority(0),
+ FloatMode(0),
+ Priv(0),
+ DX10Clamp(0),
+ DebugMode(0),
+ IEEEMode(0),
+ CodeLen(0) {}
+ // Fields set in PGM_RSRC1 pm4 packet.
+ uint32_t NumVGPR;
+ uint32_t NumSGPR;
+ uint32_t Priority;
+ uint32_t FloatMode;
+ uint32_t Priv;
+ uint32_t DX10Clamp;
+ uint32_t DebugMode;
+ uint32_t IEEEMode;
+
+ // Bonus information for debugging.
uint64_t CodeLen;
- unsigned NumSGPR;
- unsigned NumVGPR;
};
void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const;
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
deleted file mode 100644
index 91aeee2..0000000
--- a/lib/Target/R600/AMDGPUConvertToISA.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass lowers AMDIL machine instructions to the appropriate
-/// hardware instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-using namespace llvm;
-
-namespace {
-
-class AMDGPUConvertToISAPass : public MachineFunctionPass {
-
-private:
- static char ID;
- TargetMachine &TM;
-
-public:
- AMDGPUConvertToISAPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TM(tm) { }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- const char *getPassName() const override {return "AMDGPU Convert to ISA";}
-
-};
-
-} // End anonymous namespace
-
-char AMDGPUConvertToISAPass::ID = 0;
-
-FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
- return new AMDGPUConvertToISAPass(tm);
-}
-
-bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
- const AMDGPUInstrInfo * TII =
- static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
- TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
- }
- }
- return false;
-}
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
index e7e90d3..9e8302e 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -83,7 +83,7 @@ int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i));
OffsetBytes += MFI->getObjectSize(i);
- // Each regiter holds 4 bytes, so we must always align the offset to at
+ // Each register holds 4 bytes, so we must always align the offset to at
// least 4 bytes, so that 2 frame objects won't share the same register.
OffsetBytes = RoundUpToAlignment(OffsetBytes, 4);
}
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index f1f0bfa..b4d79e5 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -14,6 +14,7 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUISelLowering.h" // For AMDGPUISD
#include "AMDGPURegisterInfo.h"
+#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -83,6 +84,11 @@ private:
SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset,
+ SDValue &ImmOffset) const;
+
+ SDNode *SelectADD_SUB_I64(SDNode *N);
+ SDNode *SelectDIV_SCALE(SDNode *N);
// Include the pieces autogenerated from the target description.
#include "AMDGPUGenDAGISel.inc"
@@ -211,51 +217,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// We are selecting i64 ADD here instead of custom lower it during
// DAG legalization, so we can fold some i64 ADDs used for address
// calculation into the LOAD and STORE instructions.
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::SUB: {
if (N->getValueType(0) != MVT::i64 ||
ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
- SDLoc DL(N);
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
- SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
-
- SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub0);
- SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, LHS, Sub1);
-
- SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub0);
- SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- DL, MVT::i32, RHS, Sub1);
-
- SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
-
- SmallVector<SDValue, 8> AddLoArgs;
- AddLoArgs.push_back(SDValue(Lo0, 0));
- AddLoArgs.push_back(SDValue(Lo1, 0));
-
- SDNode *AddLo = CurDAG->getMachineNode(
- isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32,
- DL, VTList, AddLoArgs);
- SDValue Carry = SDValue(AddLo, 1);
- SDNode *AddHi = CurDAG->getMachineNode(
- isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32,
- DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
-
- SDValue Args[5] = {
- CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
- SDValue(AddLo,0),
- Sub0,
- SDValue(AddHi,0),
- Sub1,
- };
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+ return SelectADD_SUB_I64(N);
}
+ case ISD::SCALAR_TO_VECTOR:
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
const AMDGPURegisterInfo *TRI =
@@ -264,7 +235,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
EVT VT = N->getValueType(0);
unsigned NumVectorElts = VT.getVectorNumElements();
- assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ EVT EltVT = VT.getVectorElementType();
+ assert(EltVT.bitsEq(MVT::i32));
if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
bool UseVReg = true;
for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
@@ -305,7 +277,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// can't be bundled by our scheduler.
switch(NumVectorElts) {
case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
- case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = AMDGPU::R600_Reg128RegClassID;
+ break;
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
}
@@ -313,8 +290,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32);
if (NumVectorElts == 1) {
- return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS,
- VT.getVectorElementType(),
+ return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0), RegClass);
}
@@ -323,11 +299,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// 16 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
- SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
+ SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
bool IsRegSeq = true;
- for (unsigned i = 0; i < N->getNumOperands(); i++) {
+ unsigned NOps = N->getNumOperands();
+ for (unsigned i = 0; i < NOps; i++) {
// XXX: Why is this here?
if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
@@ -337,6 +314,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
}
+
+ if (NOps != NumVectorElts) {
+ // Fill in the missing undef elements if this was a scalar_to_vector.
+ assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
+
+ MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ SDLoc(N), EltVT);
+ for (unsigned i = NOps; i < NumVectorElts; ++i) {
+ RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
+ RegSeqArgs[1 + (2 * i) + 1] =
+ CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32);
+ }
+ }
+
if (!IsRegSeq)
break;
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
@@ -466,6 +457,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
PackedOffsetWidth);
}
+ case AMDGPUISD::DIV_SCALE: {
+ return SelectDIV_SCALE(N);
+ }
}
return SelectCode(N);
}
@@ -659,6 +653,129 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
+SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
+ SDLoc DL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ bool IsAdd = (N->getOpcode() == ISD::ADD);
+
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
+
+ SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub0);
+ SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, LHS, Sub1);
+
+ SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub0);
+ SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ DL, MVT::i32, RHS, Sub1);
+
+ SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
+ SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+
+
+ unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32;
+ unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
+
+ if (!isCFDepth0()) {
+ Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32;
+ CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32;
+ }
+
+ SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
+ SDValue Carry(AddLo, 1);
+ SDNode *AddHi
+ = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
+ SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+
+ SDValue Args[5] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ SDValue(AddLo,0),
+ Sub0,
+ SDValue(AddHi,0),
+ Sub1,
+ };
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+}
+
+SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(VT == MVT::f32 || VT == MVT::f64);
+
+ unsigned Opc
+ = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
+
+ const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
+
+ SDValue Ops[] = {
+ N->getOperand(0),
+ N->getOperand(1),
+ N->getOperand(2),
+ Zero,
+ Zero,
+ Zero,
+ Zero
+ };
+
+ return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+}
+
+static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) {
+ return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32,
+ Ptr), 0);
+}
+
+bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr,
+ SDValue &Offset,
+ SDValue &ImmOffset) const {
+ SDLoc DL(Addr);
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
+
+ if (isUInt<12>(C1->getZExtValue())) {
+
+ if (N0.getOpcode() == ISD::ADD) {
+ // (add (add N2, N3), C1)
+ SDValue N2 = N0.getOperand(0);
+ SDValue N3 = N0.getOperand(1);
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, N2);
+ Offset = N3;
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return true;
+ }
+
+ // (add N0, C1)
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));;
+ Offset = N0;
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16);
+ return true;
+ }
+ }
+ if (Addr.getOpcode() == ISD::ADD) {
+ // (add N0, N1)
+ SDValue N0 = Addr.getOperand(0);
+ SDValue N1 = Addr.getOperand(1);
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, N0);
+ Offset = N1;
+ ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+ }
+
+ // default case
+ Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64));
+ Offset = Addr;
+ ImmOffset = CurDAG->getTargetConstant(0, MVT::i16);
+ return true;
+}
+
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 6c443ea..0ada7a3 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -16,9 +16,9 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPU.h"
#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPURegisterInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "R600MachineFunctionInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -84,13 +84,37 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
#include "AMDGPUGenCallingConv.inc"
+// Find a larger type to do a load / store of a vector with.
+EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
+ unsigned StoreSize = VT.getStoreSizeInBits();
+ if (StoreSize <= 32)
+ return EVT::getIntegerVT(Ctx, StoreSize);
+
+ assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
+ return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
+// Type for a vector that will be loaded to.
+EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) {
+ unsigned StoreSize = VT.getStoreSizeInBits();
+ if (StoreSize <= 32)
+ return EVT::getIntegerVT(Ctx, 32);
+
+ return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
+}
+
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
TargetLowering(TM, new TargetLoweringObjectFileELF()) {
Subtarget = &TM.getSubtarget<AMDGPUSubtarget>();
- // Initialize target lowering borrowed from AMDIL
- InitAMDILLowering();
+ setOperationAction(ISD::Constant, MVT::i32, Legal);
+ setOperationAction(ISD::Constant, MVT::i64, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
// We need to custom lower some of the intrinsics
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -107,9 +131,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FROUND, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- // The hardware supports ROTR, but not ROTL
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
-
// Lower floating point store/load to integer store/load to reduce the number
// of patterns in tablegen.
setOperationAction(ISD::STORE, MVT::f32, Promote);
@@ -118,6 +139,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::v2f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::STORE, MVT::i64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::STORE, MVT::v4f32, Promote);
AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
@@ -161,6 +185,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
+ setOperationAction(ISD::LOAD, MVT::i64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+
setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
@@ -202,29 +229,63 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
+ setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+ setOperationAction(ISD::FRINT, MVT::f64, Custom);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+ }
- setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
- setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
+ if (!Subtarget->hasBFI()) {
+ // fcopysign can be done in a single instruction with BFI.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ }
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+ for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
- setOperationAction(ISD::MUL, MVT::i64, Expand);
- setOperationAction(ISD::SUB, MVT::i64, Expand);
+ // GPU does not have divrem function for signed or unsigned.
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ if (!Subtarget->hasBCNT(32))
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+ if (!Subtarget->hasBCNT(64))
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ // The hardware supports 32-bit ROTR, but not ROTL.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
+ setOperationAction(ISD::ROTR, MVT::i64, Expand);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
- setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
- static const MVT::SimpleValueType IntTypes[] = {
+ static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
- for (MVT VT : IntTypes) {
- //Expand the following operations for the current type by default
+ for (MVT VT : VectorIntTypes) {
+ // Expand the following operations for the current type by default.
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
@@ -232,40 +293,93 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::MUL, VT, Expand);
setOperationAction(ISD::OR, VT, Expand);
setOperationAction(ISD::SHL, VT, Expand);
- setOperationAction(ISD::SINT_TO_FP, VT, Expand);
- setOperationAction(ISD::SRL, VT, Expand);
setOperationAction(ISD::SRA, VT, Expand);
+ setOperationAction(ISD::SRL, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Expand);
+ setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::SUB, VT, Expand);
- setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SINT_TO_FP, VT, Expand);
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
+ // TODO: Implement custom UREM / SREM routines.
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Custom);
+ setOperationAction(ISD::UDIVREM, VT, Custom);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::XOR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
}
- static const MVT::SimpleValueType FloatTypes[] = {
+ static const MVT::SimpleValueType FloatVectorTypes[] = {
MVT::v2f32, MVT::v4f32
};
- for (MVT VT : FloatTypes) {
+ for (MVT VT : FloatVectorTypes) {
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FMUL, VT, Expand);
+ setOperationAction(ISD::FMA, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
}
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT_CC);
+
+ setSchedulingPreference(Sched::RegPressure);
+ setJumpIsExpensive(true);
+
+ setSelectIsExpensive(false);
+ PredictableSelectIsExpensive = false;
+
+ // There are no integer divide instructions, and these expand to a pretty
+ // large sequence of instructions.
+ setIntDivIsCheap(false);
+ setPow2DivIsCheap(false);
+
+ // TODO: Investigate this when 64-bit divides are implemented.
+ addBypassSlowDiv(64, 32);
+
+ // FIXME: Need to really handle these.
+ MaxStoresPerMemcpy = 4096;
+ MaxStoresPerMemmove = 4096;
+ MaxStoresPerMemset = 4096;
}
//===----------------------------------------------------------------------===//
@@ -276,6 +390,23 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const {
return MVT::i32;
}
+bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
+ return true;
+}
+
+// The backend supports 32 and 64 bit floating point immediates.
+// FIXME: Why are we reporting vectors of FP immediates as legal?
+bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
+}
+
+// We don't want to shrink f64 / f32 constants.
+bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ EVT ScalarVT = VT.getScalarType();
+ return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
+}
+
bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
EVT CastTy) const {
if (LoadTy.getSizeInBits() != CastTy.getSizeInBits())
@@ -330,6 +461,10 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
return Src == MVT::i32 && Dest == MVT::i64;
}
+bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+}
+
bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// There aren't really 64-bit registers, but pairs of 32-bit ones and only a
// limited number of native 64-bit operations. Shrinking an operation to fit
@@ -383,25 +518,28 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
return SDValue();
}
-SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
- const {
+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
Op.getNode()->dump();
llvm_unreachable("Custom lowering code for this"
"instruction is not implemented yet!");
break;
- // AMDIL DAG lowering
- case ISD::SDIV: return LowerSDIV(Op, DAG);
- case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- // AMDGPU DAG lowering
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SDIV: return LowerSDIV(Op, DAG);
+ case ISD::SREM: return LowerSREM(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
+ case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
+ case ISD::FCEIL: return LowerFCEIL(Op, DAG);
+ case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
+ case ISD::FRINT: return LowerFRINT(Op, DAG);
+ case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
+ case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
}
return Op;
@@ -419,95 +557,23 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
- case ISD::UDIV: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM);
- break;
- }
- case ISD::UREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
- N->getOperand(0), N->getOperand(1));
- Results.push_back(UDIVREM.getValue(1));
- break;
- }
- case ISD::UDIVREM: {
- SDValue Op = SDValue(N, 0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
-
- SDValue one = DAG.getConstant(1, HalfVT);
- SDValue zero = DAG.getConstant(0, HalfVT);
-
- //HiLo split
- SDValue LHS = N->getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
-
- SDValue RHS = N->getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
-
- // Get Speculative values
- SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
- SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
-
- SDValue REM_Hi = zero;
- SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
-
- SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
- SDValue DIV_Lo = zero;
-
- const unsigned halfBitWidth = HalfVT.getSizeInBits();
-
- for (unsigned i = 0; i < halfBitWidth; ++i) {
- SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
- // Get Value of high bit
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
-
- SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
- DAG.getConstant(halfBitWidth - 1, HalfVT));
- REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
- REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
-
- REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
- REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
-
-
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
-
- SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
- SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
-
- DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
-
- // Update REM
-
- SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
-
- REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
- REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
- REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
- }
+ case ISD::LOAD: {
+ SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
+ if (!Node)
+ return;
- SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
- SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
- Results.push_back(DIV);
- Results.push_back(REM);
- break;
+ Results.push_back(SDValue(Node, 0));
+ Results.push_back(SDValue(Node, 1));
+ // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
+ // function
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
+ return;
+ }
+ case ISD::STORE: {
+ SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG);
+ if (Lowered.getNode())
+ Results.push_back(Lowered);
+ return;
}
default:
return;
@@ -531,12 +597,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
SelectionDAG &DAG) const {
const DataLayout *TD = getTargetMachine().getDataLayout();
SDLoc DL(InitPtr);
+ Type *InitTy = Init->getType();
+
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
- EVT VT = EVT::getEVT(CI->getType());
- PointerType *PtrTy = PointerType::get(CI->getType(), 0);
- return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
- MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
- TD->getPrefTypeAlignment(CI->getType()));
+ EVT VT = EVT::getEVT(InitTy);
+ PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+ return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+ TD->getPrefTypeAlignment(InitTy));
}
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
@@ -547,7 +615,6 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
TD->getPrefTypeAlignment(CFP->getType()));
}
- Type *InitTy = Init->getType();
if (StructType *ST = dyn_cast<StructType>(InitTy)) {
const StructLayout *SL = TD->getStructLayout(ST);
@@ -589,6 +656,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+ if (isa<UndefValue>(Init)) {
+ EVT VT = EVT::getEVT(InitTy);
+ PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
+ return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
+ MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
+ TD->getPrefTypeAlignment(InitTy));
+ }
+
Init->dump();
llvm_unreachable("Unhandled constant initializer");
}
@@ -628,11 +703,19 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
unsigned Size = TD->getTypeAllocSize(EltType);
unsigned Alignment = TD->getPrefTypeAlignment(EltType);
+ MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS);
+ MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS);
+
+ int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
+ SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT);
+
const GlobalVariable *Var = cast<GlobalVariable>(GV);
+ if (!Var->hasInitializer()) {
+ // This has no use, but bugpoint will hit it.
+ return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
+ }
+
const Constant *Init = Var->getInitializer();
- int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
- SDValue InitPtr = DAG.getFrameIndex(FI,
- getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
SmallVector<SDNode*, 8> WorkList;
for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(),
@@ -651,8 +734,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
}
DAG.UpdateNodeOperands(*I, Ops);
}
- return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op),
- getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
+ return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT);
}
}
}
@@ -688,8 +770,7 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
const AMDGPUFrameLowering *TFL =
static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
- FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
- assert(FIN);
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FIN->getIndex();
unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
@@ -705,26 +786,66 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
default: return Op;
- case AMDGPUIntrinsic::AMDIL_abs:
+ case AMDGPUIntrinsic::AMDGPU_abs:
+ case AMDGPUIntrinsic::AMDIL_abs: // Legacy name.
return LowerIntrinsicIABS(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_exp:
- return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_lrp:
return LowerIntrinsicLRP(Op, DAG);
- case AMDGPUIntrinsic::AMDIL_fraction:
+ case AMDGPUIntrinsic::AMDGPU_fract:
+ case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
- case AMDGPUIntrinsic::AMDIL_max:
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
+
+ case AMDGPUIntrinsic::AMDGPU_clamp:
+ case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name.
+ return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_div_scale: {
+ // 3rd parameter required to be a constant.
+ const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ if (!Param)
+ return DAG.getUNDEF(VT);
+
+ // Translate to the operands expected by the machine instruction. The
+ // first parameter must be the same as the first instruction.
+ SDValue Numerator = Op.getOperand(1);
+ SDValue Denominator = Op.getOperand(2);
+ SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+
+ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT,
+ Src0, Denominator, Numerator);
+ }
+
+ case Intrinsic::AMDGPU_div_fmas:
+ return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_div_fixup:
+ return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case Intrinsic::AMDGPU_trig_preop:
+ return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::AMDGPU_rcp:
+ return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::AMDGPU_rsq:
+ return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
+ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
+
+ case Intrinsic::AMDGPU_rsq_clamped:
+ return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
+
case AMDGPUIntrinsic::AMDGPU_imax:
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_umax:
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_min:
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
- Op.getOperand(2));
case AMDGPUIntrinsic::AMDGPU_imin:
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -748,6 +869,18 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3:
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1));
+
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
Op.getOperand(1),
@@ -771,8 +904,16 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1),
Op.getOperand(2));
- case AMDGPUIntrinsic::AMDIL_round_nearest:
+ case AMDGPUIntrinsic::AMDGPU_brev:
+ return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDIL_exp: // Legacy name.
+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
+
+ case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name.
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
+ case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
+ return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
}
}
@@ -863,27 +1004,41 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
+ EVT LoadVT = Op.getValueType();
EVT EltVT = Op.getValueType().getVectorElementType();
EVT PtrVT = Load->getBasePtr().getValueType();
+
unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
SmallVector<SDValue, 8> Loads;
+ SmallVector<SDValue, 8> Chains;
+
SDLoc SL(Op);
for (unsigned i = 0, e = NumElts; i != e; ++i) {
SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
- Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
- Load->getChain(), Ptr,
- MachinePointerInfo(Load->getMemOperand()->getValue()),
- MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
- Load->getAlignment()));
+
+ SDValue NewLoad
+ = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
+ Load->getChain(), Ptr,
+ MachinePointerInfo(Load->getMemOperand()->getValue()),
+ MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
+ Load->getAlignment());
+ Loads.push_back(NewLoad.getValue(0));
+ Chains.push_back(NewLoad.getValue(1));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads),
+ DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains)
+ };
+
+ return DAG.getMergeValues(Ops, SL);
}
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
- StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT MemVT = Store->getMemoryVT();
unsigned MemBits = MemVT.getSizeInBits();
@@ -981,7 +1136,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Load->getBasePtr(),
MemVT,
Load->getMemOperand());
- return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32),
+ ExtLoad32.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
@@ -995,7 +1156,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
BasePtr, MVT::i8, MMO);
- return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD),
+ NewLD.getValue(1)
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
// Lower loads constant address space global variable loads
@@ -1003,11 +1170,12 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
isa<GlobalVariable>(
GetUnderlyingObject(Load->getMemOperand()->getValue()))) {
+
SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
DAG.getConstant(2, MVT::i32));
- return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
+ return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
Load->getChain(), Ptr,
DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2));
}
@@ -1034,10 +1202,21 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
EVT MemEltVT = MemVT.getScalarType();
if (ExtType == ISD::SEXTLOAD) {
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
- return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
+
+ SDValue Ops[] = {
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode),
+ Load->getChain()
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
- return DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
+ SDValue Ops[] = {
+ DAG.getZeroExtendInReg(Ret, DL, MemEltVT),
+ Load->getChain()
+ };
+
+ return DAG.getMergeValues(Ops, DL);
}
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1097,6 +1276,251 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(ISD::FMUL, DL, FLTTY,
+ fa, DAG.getNode(AMDGPUISD::RCP, DL, FLTTY, fb));
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
+ DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, OVT),
+ DAG.getConstant(0, OVT),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType().getScalarType();
+
+ if (OVT == MVT::i64)
+ return LowerSDIV64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSDIV32(Op, DAG);
+
+ if (OVT == MVT::i16 || OVT == MVT::i8) {
+ // FIXME: We should be checking for the masked bits. This isn't reached
+ // because i8 and i16 are not legal types.
+ return LowerSDIV24(Op, DAG);
+ }
+
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
+ EVT OVT = Op.getValueType();
+
+ if (OVT.getScalarType() == MVT::i64)
+ return LowerSREM64(Op, DAG);
+
+ if (OVT.getScalarType() == MVT::i32)
+ return LowerSREM32(Op, DAG);
+
+ return SDValue(Op.getNode(), 0);
+}
+
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -1201,6 +1625,177 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
return DAG.getMergeValues(Ops, DL);
}
+SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue NegOne = DAG.getConstant(-1, VT);
+
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+ SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
+ SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
+ SDValue RSign = LHSign; // Remainder sign is the same as LHS
+
+ LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
+
+ LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
+ RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
+
+ SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
+ SDValue Rem = Div.getValue(1);
+
+ Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
+
+ Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
+ Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
+
+ SDValue Res[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Res, DL);
+}
+
+SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src)
+ // if (src > 0.0 && src != result)
+ // result += 1.0
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue One = DAG.getConstantFP(1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
+SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ const SDValue Zero = DAG.getConstant(0, MVT::i32);
+ const SDValue One = DAG.getConstant(1, MVT::i32);
+
+ SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ // Extract the upper half, since this is where we will find the sign and
+ // exponent.
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+
+ const unsigned FractBits = 52;
+ const unsigned ExpBits = 11;
+
+ // Extract the exponent.
+ SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32,
+ Hi,
+ DAG.getConstant(FractBits - 32, MVT::i32),
+ DAG.getConstant(ExpBits, MVT::i32));
+ SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
+ DAG.getConstant(1023, MVT::i32));
+
+ // Extract the sign bit.
+ const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32);
+ SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
+
+ // Extend back to to 64-bits.
+ SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+ Zero, SignBit);
+ SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
+
+ SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
+ const SDValue FractMask
+ = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64);
+
+ SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
+ SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
+ SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32);
+
+ const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32);
+
+ SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
+ SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
+
+ SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
+ SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
+
+ return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ assert(Op.getValueType() == MVT::f64);
+
+ APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+ SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64);
+ SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
+
+ SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
+ SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
+
+ SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
+
+ APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+ SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+ SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
+
+ return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
+}
+
+SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const {
+ // FNEARBYINT and FRINT are the same, except in their handling of FP
+ // exceptions. Those aren't really meaningful for us, and OpenCL only has
+ // rint, so just treat them as equivalent.
+ return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0));
+}
+
+SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+
+ // result = trunc(src);
+ // if (src < 0.0 && src != result)
+ // result += -1.0.
+
+ SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
+
+ const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64);
+ const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64);
+
+ EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64);
+
+ SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
+ SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
+ SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
+
+ SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+ return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
+}
+
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDValue S0 = Op.getOperand(0);
@@ -1218,7 +1813,6 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
-
}
SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op,
@@ -1303,6 +1897,37 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
return DAG.getConstant(Src0 >> Offset, MVT::i32);
}
+SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ EVT VT = N->getValueType(0);
+
+ if (VT.isVector() || VT.getSizeInBits() > 32)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mul;
+
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
+ } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
+ } else {
+ return SDValue();
+ }
+
+ // We need to use sext even for MUL_U24, because MUL_U24 is used
+ // for signed multiply of 8 and 16-bit types.
+ return DAG.getSExtOrTrunc(Mul, DL, VT);
+}
+
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -1310,34 +1935,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
switch(N->getOpcode()) {
default: break;
- case ISD::MUL: {
- EVT VT = N->getValueType(0);
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue Mul;
-
- // FIXME: Add support for 24-bit multiply with 64-bit output on SI.
- if (VT.isVector() || VT.getSizeInBits() > 32)
- break;
-
- if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
- N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
- } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
- N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
- N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
- Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
- } else {
- break;
- }
-
- // We need to use sext even for MUL_U24, because MUL_U24 is used
- // for signed multiply of 8 and 16-bit types.
- SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT);
-
- return Reg;
- }
+ case ISD::MUL:
+ return performMulCombine(N, DCI);
case AMDGPUISD::MUL_I24:
case AMDGPUISD::MUL_U24: {
SDValue N0 = N->getOperand(0);
@@ -1511,29 +2110,38 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
// AMDIL DAG nodes
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
- NODE_NAME_CASE(DIV_INF);
NODE_NAME_CASE(RET_FLAG);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
+ NODE_NAME_CASE(CLAMP)
NODE_NAME_CASE(FMAX)
NODE_NAME_CASE(SMAX)
NODE_NAME_CASE(UMAX)
NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(SMIN)
NODE_NAME_CASE(UMIN)
+ NODE_NAME_CASE(URECIP)
+ NODE_NAME_CASE(DIV_SCALE)
+ NODE_NAME_CASE(DIV_FMAS)
+ NODE_NAME_CASE(DIV_FIXUP)
+ NODE_NAME_CASE(TRIG_PREOP)
+ NODE_NAME_CASE(RCP)
+ NODE_NAME_CASE(RSQ)
+ NODE_NAME_CASE(RSQ_LEGACY)
+ NODE_NAME_CASE(RSQ_CLAMPED)
+ NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(BFE_U32)
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(BREV)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
NODE_NAME_CASE(MAD_I24)
- NODE_NAME_CASE(URECIP)
- NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)
@@ -1544,6 +2152,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SAMPLEB)
NODE_NAME_CASE(SAMPLED)
NODE_NAME_CASE(SAMPLEL)
+ NODE_NAME_CASE(CVT_F32_UBYTE0)
+ NODE_NAME_CASE(CVT_F32_UBYTE1)
+ NODE_NAME_CASE(CVT_F32_UBYTE2)
+ NODE_NAME_CASE(CVT_F32_UBYTE3)
+ NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
NODE_NAME_CASE(STORE_MSKOR)
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index d5d821d..98a92ad 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -42,10 +42,33 @@ private:
SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
/// \returns The resulting chain.
+
+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
+ unsigned BitsDiff,
+ SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
protected:
+ static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
+ static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
/// \brief Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
@@ -61,6 +84,7 @@ protected:
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
@@ -87,10 +111,16 @@ public:
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
MVT getVectorIdxTy() const override;
+ bool isSelectSupported(SelectSupportKind) const override;
+
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool ShouldShrinkFPConstant(EVT VT) const override;
+
bool isLoadBitCastBeneficial(EVT, EVT) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
@@ -101,6 +131,7 @@ public:
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
@@ -128,38 +159,6 @@ public:
SDValue Op,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
-
-// Functions defined in AMDILISelLowering.cpp
-public:
- bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const override;
-
- /// We want to mark f32/f64 floating point values as legal.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
-
- /// We don't want to shrink f64/f32 constants.
- bool ShouldShrinkFPConstant(EVT VT) const override;
-
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
-private:
- void InitAMDILLowering();
- SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
-
- SDValue ExpandSIGN_EXTEND_INREG(SDValue Op,
- unsigned BitsDiff,
- SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD {
@@ -169,12 +168,15 @@ enum {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
CALL, // Function call based on a single integer
UMUL, // 32bit unsigned multiplication
- DIV_INF, // Divide with infinity returned on zero divisor
RET_FLAG,
BRANCH_COND,
// End AMDIL ISD Opcodes
DWORDADDR,
FRACT,
+ CLAMP,
+
+ // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
+ // Denormals handled on some parts.
COS_HW,
SIN_HW,
FMAX,
@@ -184,11 +186,23 @@ enum {
SMIN,
UMIN,
URECIP,
+ DIV_SCALE,
+ DIV_FMAS,
+ DIV_FIXUP,
+ TRIG_PREOP, // 1 ULP max error for f64
+
+ // RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
+ // For f64, max error 2^29 ULP, handles denormals.
+ RCP,
+ RSQ,
+ RSQ_LEGACY,
+ RSQ_CLAMPED,
DOT4,
BFE_U32, // Extract range of bits with zero extension to 32-bits.
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
+ BREV, // Reverse bits.
MUL_U24,
MUL_I24,
MAD_U24,
@@ -203,6 +217,21 @@ enum {
SAMPLEB,
SAMPLED,
SAMPLEL,
+
+ // These cvt_f32_ubyte* nodes need to remain consecutive and in order.
+ CVT_F32_UBYTE0,
+ CVT_F32_UBYTE1,
+ CVT_F32_UBYTE2,
+ CVT_F32_UBYTE3,
+ /// This node is for VLIW targets and it is used to represent a vector
+ /// that is stored in consecutive registers with the same channel.
+ /// For example:
+ /// |X |Y|Z|W|
+ /// T0|v.x| | | |
+ /// T1|v.y| | | |
+ /// T2|v.z| | | |
+ /// T3|v.w| | | |
+ BUILD_VERTICAL_VECTOR,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index 1c3361a..fef5b8c 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
-AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
- : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { }
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { }
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
return RI;
@@ -320,33 +320,11 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
return -1;
}
- Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1);
+ Offset = MF.getTarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
return getIndirectIndexBegin(MF) + Offset;
}
-
-void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const {
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const AMDGPURegisterInfo & RI = getRegisterInfo();
-
- for (unsigned i = 0; i < MI.getNumOperands(); i++) {
- MachineOperand &MO = MI.getOperand(i);
- // Convert dst regclass to one that is supported by the ISA
- if (MO.isReg() && MO.isDef()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
- const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-
- assert(newRegClass);
-
- MRI.setRegClass(MO.getReg(), newRegClass);
- }
- }
- }
-}
-
int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
switch (Channels) {
default: return Opcode;
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 74baf6b..95dc8c1 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -33,7 +33,7 @@
namespace llvm {
-class AMDGPUTargetMachine;
+class AMDGPUSubtarget;
class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
@@ -45,9 +45,9 @@ private:
MachineBasicBlock &MBB) const;
virtual void anchor();
protected:
- TargetMachine &TM;
+ const AMDGPUSubtarget &ST;
public:
- explicit AMDGPUInstrInfo(TargetMachine &tm);
+ explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
@@ -137,14 +137,6 @@ public:
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
// Helper functions that check the opcode for status information
- bool isLoadInst(llvm::MachineInstr *MI) const;
- bool isExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isSExtLoadInst(llvm::MachineInstr *MI) const;
- bool isZExtLoadInst(llvm::MachineInstr *MI) const;
- bool isAExtLoadInst(llvm::MachineInstr *MI) const;
- bool isStoreInst(llvm::MachineInstr *MI) const;
- bool isTruncStoreInst(llvm::MachineInstr *MI) const;
bool isRegisterStore(const MachineInstr &MI) const;
bool isRegisterLoad(const MachineInstr &MI) const;
@@ -185,11 +177,6 @@ public:
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const = 0;
-
- /// \brief Convert the AMDIL MachineInstr to a supported ISA
- /// MachineInstr
- void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const;
-
/// \brief Build a MOV instruction.
virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index f96dbb4..934d59d 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
+def AMDGPUTrigPreOp : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
+>;
+
+def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
+ [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
@@ -29,11 +37,25 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
// out = a - floor(a)
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
+// out = 1.0 / a
+def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a)
+def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
+
+// out = 1.0 / sqrt(a) result clamped to +/- max_float.
+def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>;
+
// out = max(a, b) a and b are floats
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
+
// out = max(a, b) a and b are signed ints
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
@@ -59,12 +81,38 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
+
+def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2",
+ SDTIntToFPOp, []>;
+def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3",
+ SDTIntToFPOp, []>;
+
+
// urecip - This operation is a helper for integer division, it returns the
// result of 1 / a as a fractional unsigned integer.
// out = (2^32 / a) + e
// e is rounding error
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
+// Special case divide preop and flags.
+def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>;
+
+// Special case divide FMA with scale and flags (src0 = Quotient,
+// src1 = Denominator, src2 = Numerator).
+def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>;
+
+// Single or double precision division fixup.
+// Special case divide fixup and flags(src0 = Quotient, src1 =
+// Denominator, src2 = Numerator).
+def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>;
+
+// Look Up 2.0 / pi src0 with segment select src1[4:0]
+def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>;
+
def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD",
SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>,
[SDNPHasChain, SDNPMayLoad]>;
@@ -92,6 +140,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
+
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
@@ -107,3 +157,22 @@ def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
[]
>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control Profile Types
+//===----------------------------------------------------------------------===//
+// Branch instruction where second and third are basic blocks
+def SDTIL_BRCond : SDTypeProfile<0, 2, [
+ SDTCisVT<0, OtherVT>
+ ]>;
+
+//===----------------------------------------------------------------------===//
+// Flow Control DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Call/Return DAG Nodes
+//===----------------------------------------------------------------------===//
+def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 80bdf5b..b86b781 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -49,6 +49,11 @@ def u8imm : Operand<i8> {
let PrintMethod = "printU8ImmOperand";
}
+//===--------------------------------------------------------------------===//
+// Custom Operands
+//===--------------------------------------------------------------------===//
+def brtarget : Operand<OtherVT>;
+
//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
@@ -127,6 +132,21 @@ def COND_NULL : PatLeaf <
// Load/Store Pattern Fragments
//===----------------------------------------------------------------------===//
+def global_store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return isGlobalStore(dyn_cast<StoreSDNode>(N));
+}]>;
+
+// Global address space loads
+def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
+}]>;
+
+// Constant address space loads
+def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
+}]>;
+
def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
LoadSDNode *L = cast<LoadSDNode>(N);
return L->getExtensionType() == ISD::ZEXTLOAD ||
@@ -232,26 +252,55 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isLocalLoad(dyn_cast<LoadSDNode>(N));
}]>;
-def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value),
- (atomic_load_add node:$ptr, node:$value), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
-}]>;
-def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value),
- (atomic_load_sub node:$ptr, node:$value), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+class local_binary_atomic_op<SDNode atomic_op> :
+ PatFrag<(ops node:$ptr, node:$value),
+ (atomic_op node:$ptr, node:$value), [{
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
}]>;
+
+def atomic_swap_local : local_binary_atomic_op<atomic_swap>;
+def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>;
+def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>;
+def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>;
+def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>;
+def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>;
+def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>;
+def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>;
+def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>;
+def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>;
+def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
+
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
+def atomic_cmp_swap_32_local :
+ PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i32 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+def atomic_cmp_swap_64_local :
+ PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+ (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ return AN->getMemoryVT() == MVT::i64 &&
+ AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}]>;
+
+
class Constants {
int TWO_PI = 0x40c90fdb;
int PI = 0x40490fdb;
int TWO_PI_INV = 0x3e22f983;
int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding
+int FP32_NEG_ONE = 0xbf800000;
+int FP32_ONE = 0x3f800000;
}
def CONST : Constants;
@@ -273,7 +322,7 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
(ins rc:$src0),
"CLAMP $dst, $src0",
- [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
+ [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
>;
class FABS <RegisterClass rc> : AMDGPUShaderInst <
@@ -363,7 +412,7 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
// BFI_INT patterns
-multiclass BFIPatterns <Instruction BFI_INT> {
+multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> {
// Definition from ISA doc:
// (y & x) | (z & ~x)
@@ -379,6 +428,19 @@ multiclass BFIPatterns <Instruction BFI_INT> {
(BFI_INT $x, $y, $z)
>;
+ def : Pat <
+ (fcopysign f32:$src0, f32:$src1),
+ (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
+ >;
+
+ def : Pat <
+ (f64 (fcopysign f64:$src0, f64:$src1)),
+ (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (i32 (EXTRACT_SUBREG $src0, sub0)), sub0),
+ (BFI_INT (LoadImm32 0x7fffffff),
+ (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
+ >;
}
// SHA-256 Ma patterns
@@ -457,6 +519,23 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
>;
}
+class RcpPat<Instruction RcpInst, ValueType vt> : Pat <
+ (fdiv FP_ONE, vt:$src),
+ (RcpInst $src)
+>;
+
+multiclass RsqPat<Instruction RsqInst, ValueType vt> {
+ def : Pat <
+ (fdiv FP_ONE, (fsqrt vt:$src)),
+ (RsqInst $src)
+ >;
+
+ def : Pat <
+ (AMDGPUrcp (fsqrt vt:$src)),
+ (RsqInst $src)
+ >;
+}
+
include "R600Instructions.td"
include "R700Instructions.td"
include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
index fab4a3b..58916a9 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.cpp
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp
@@ -1,4 +1,4 @@
-//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//
//===-----------------------------------------------------------------------===//
-#include "AMDILIntrinsicInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Intrinsics.h"
@@ -24,14 +24,12 @@ using namespace llvm;
#include "AMDGPUGenIntrinsics.inc"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
-AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
- : TargetIntrinsicInfo() {
-}
+AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
+ : TargetIntrinsicInfo() {}
-std::string
-AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
- unsigned int numTys) const {
- static const char* const names[] = {
+std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
#define GET_INTRINSIC_NAME_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_NAME_TABLE
@@ -40,23 +38,23 @@ AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
if (IntrID < Intrinsic::num_intrinsics) {
return nullptr;
}
- assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
- && "Invalid intrinsic ID");
+ assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics &&
+ "Invalid intrinsic ID");
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
return Result;
}
-unsigned int
-AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
+unsigned AMDGPUIntrinsicInfo::lookupName(const char *Name,
+ unsigned Len) const {
if (!StringRef(Name, Len).startswith("llvm."))
return 0; // All intrinsics start with 'llvm.'
#define GET_FUNCTION_RECOGNIZER
#include "AMDGPUGenIntrinsics.inc"
#undef GET_FUNCTION_RECOGNIZER
- AMDGPUIntrinsic::ID IntrinsicID
- = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
+ AMDGPUIntrinsic::ID IntrinsicID =
+ (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name);
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
@@ -65,17 +63,15 @@ AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const {
return 0;
}
-bool
-AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
- // Overload Table
+bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const {
+// Overload Table
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "AMDGPUGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
-Function*
-AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
- Type **Tys,
- unsigned numTys) const {
+Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ Type **Tys,
+ unsigned numTys) const {
llvm_unreachable("Not implemented");
}
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDGPUIntrinsicInfo.h
index 924275a..5be68a2 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.h
+++ b/lib/Target/R600/AMDGPUIntrinsicInfo.h
@@ -1,4 +1,4 @@
-//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
+//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,8 @@
/// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class.
//
//===-----------------------------------------------------------------------===//
-#ifndef AMDIL_INTRINSICS_H
-#define AMDIL_INTRINSICS_H
+#ifndef AMDGPU_INTRINSICINFO_H
+#define AMDGPU_INTRINSICINFO_H
#include "llvm/IR/Intrinsics.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -34,16 +34,15 @@ enum ID {
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo(TargetMachine *tm);
- std::string getName(unsigned int IntrId, Type **Tys = nullptr,
- unsigned int numTys = 0) const override;
- unsigned int lookupName(const char *Name, unsigned int Len) const override;
- bool isOverloaded(unsigned int IID) const override;
- Function *getDeclaration(Module *M, unsigned int ID,
+ std::string getName(unsigned IntrId, Type **Tys = nullptr,
+ unsigned numTys = 0) const override;
+ unsigned lookupName(const char *Name, unsigned Len) const override;
+ bool isOverloaded(unsigned IID) const override;
+ Function *getDeclaration(Module *M, unsigned ID,
Type **Tys = nullptr,
- unsigned int numTys = 0) const override;
+ unsigned numTys = 0) const override;
};
} // end namespace llvm
-#endif // AMDIL_INTRINSICS_H
-
+#endif // AMDGPU_INTRINSICINFO_H
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index 9ad5e72..d934676 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -18,18 +18,26 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-
+ def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ def int_AMDGPU_fract : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+
+ // This is named backwards (instead of rsq_legacy) so we don't have
+ // to define it with the public builtins intrinsics. This is a
+ // workaround for how intrinsic names are parsed. If the name is
+ // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant
+ // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name.
+ def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
- def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
@@ -53,12 +61,27 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_cvt_f32_ubyte3 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_barrier_local : Intrinsic<[], [], []>;
+ def int_AMDGPU_barrier_global : Intrinsic<[], [], []>;
+}
+
+// Legacy names for compatibility.
+let TargetPrefix = "AMDIL", isTarget = 1 in {
+ def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+ def int_AMDIL_round_nearest : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
}
let TargetPrefix = "TGSI", isTarget = 1 in {
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index b759495..ac82e88 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -15,6 +15,7 @@
#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
+#include "AMDGPUTargetMachine.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "R600InstrInfo.h"
#include "SIInstrInfo.h"
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
index 2b7f1e3..58fe34d 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -14,9 +14,9 @@
namespace llvm {
class AMDGPUSubtarget;
-class MCInst;
-class MCContext;
class MachineInstr;
+class MCContext;
+class MCInst;
class AMDGPUMCInstLower {
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
new file mode 100644
index 0000000..218750d
--- /dev/null
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -0,0 +1,387 @@
+//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates allocas by either converting them into vectors or
+// by migrating them to local address space.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "amdgpu-promote-alloca"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteAlloca : public FunctionPass,
+ public InstVisitor<AMDGPUPromoteAlloca> {
+
+ static char ID;
+ Module *Mod;
+ const AMDGPUSubtarget &ST;
+ int LocalMemAvailable;
+
+public:
+ AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
+ LocalMemAvailable(0) { }
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function &F);
+ virtual const char *getPassName() const {
+ return "AMDGPU Promote Alloca";
+ }
+ void visitAlloca(AllocaInst &I);
+};
+
+} // End anonymous namespace
+
+char AMDGPUPromoteAlloca::ID = 0;
+
+bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
+ Mod = &M;
+ return false;
+}
+
+bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
+
+ const FunctionType *FTy = F.getFunctionType();
+
+ LocalMemAvailable = ST.getLocalMemorySize();
+
+
+ // If the function has any arguments in the local address space, then it's
+ // possible these arguments require the entire local memory space, so
+ // we cannot use local memory in the pass.
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+ const Type *ParamTy = FTy->getParamType(i);
+ if (ParamTy->isPointerTy() &&
+ ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ LocalMemAvailable = 0;
+ DEBUG(dbgs() << "Function has local memory argument. Promoting to "
+ "local memory disabled.\n");
+ break;
+ }
+ }
+
+ if (LocalMemAvailable > 0) {
+ // Check how much local memory is being used by global objects
+ for (Module::global_iterator I = Mod->global_begin(),
+ E = Mod->global_end(); I != E; ++I) {
+ GlobalVariable *GV = I;
+ PointerType *GVTy = GV->getType();
+ if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ continue;
+ for (Value::use_iterator U = GV->use_begin(),
+ UE = GV->use_end(); U != UE; ++U) {
+ Instruction *Use = dyn_cast<Instruction>(*U);
+ if (!Use)
+ continue;
+ if (Use->getParent()->getParent() == &F)
+ LocalMemAvailable -=
+ Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
+ }
+ }
+ }
+
+ LocalMemAvailable = std::max(0, LocalMemAvailable);
+ DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
+
+ visit(F);
+
+ return false;
+}
+
+static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+ return VectorType::get(ArrayTy->getArrayElementType(),
+ ArrayTy->getArrayNumElements());
+}
+
+static Value* calculateVectorIndex(Value *Ptr,
+ std::map<GetElementPtrInst*, Value*> GEPIdx) {
+ if (isa<AllocaInst>(Ptr))
+ return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+ GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
+
+ return GEPIdx[GEP];
+}
+
+static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
+ // FIXME we only support simple cases
+ if (GEP->getNumOperands() != 3)
+ return NULL;
+
+ ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
+ if (!I0 || !I0->isZero())
+ return NULL;
+
+ return GEP->getOperand(2);
+}
+
+// Not an instruction handled below to turn into a vector.
+//
+// TODO: Check isTriviallyVectorizable for calls and handle other
+// instructions.
+static bool canVectorizeInst(Instruction *Inst) {
+ switch (Inst->getOpcode()) {
+ case Instruction::Load:
+ case Instruction::Store:
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
+ Type *AllocaTy = Alloca->getAllocatedType();
+
+ DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
+
+ // FIXME: There is no reason why we can't support larger arrays, we
+ // are just being conservative for now.
+ if (!AllocaTy->isArrayTy() ||
+ AllocaTy->getArrayElementType()->isVectorTy() ||
+ AllocaTy->getArrayNumElements() > 4) {
+
+ DEBUG(dbgs() << " Cannot convert type to vector");
+ return false;
+ }
+
+ std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+ std::vector<Value*> WorkList;
+ for (User *AllocaUser : Alloca->users()) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
+ if (!GEP) {
+ if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
+ return false;
+
+ WorkList.push_back(AllocaUser);
+ continue;
+ }
+
+ Value *Index = GEPToVectorIndex(GEP);
+
+ // If we can't compute a vector index from this GEP, then we can't
+ // promote this alloca to vector.
+ if (!Index) {
+ DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
+ return false;
+ }
+
+ GEPVectorIdx[GEP] = Index;
+ for (User *GEPUser : AllocaUser->users()) {
+ if (!canVectorizeInst(cast<Instruction>(GEPUser)))
+ return false;
+
+ WorkList.push_back(GEPUser);
+ }
+ }
+
+ VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
+
+ DEBUG(dbgs() << " Converting alloca to vector "
+ << *AllocaTy << " -> " << *VectorTy << '\n');
+
+ for (std::vector<Value*>::iterator I = WorkList.begin(),
+ E = WorkList.end(); I != E; ++I) {
+ Instruction *Inst = cast<Instruction>(*I);
+ IRBuilder<> Builder(Inst);
+ switch (Inst->getOpcode()) {
+ case Instruction::Load: {
+ Value *Ptr = Inst->getOperand(0);
+ Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+ Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
+ Inst->replaceAllUsesWith(ExtractElement);
+ Inst->eraseFromParent();
+ break;
+ }
+ case Instruction::Store: {
+ Value *Ptr = Inst->getOperand(1);
+ Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+ Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
+ Value *VecValue = Builder.CreateLoad(BitCast);
+ Value *NewVecValue = Builder.CreateInsertElement(VecValue,
+ Inst->getOperand(0),
+ Index);
+ Builder.CreateStore(NewVecValue, BitCast);
+ Inst->eraseFromParent();
+ break;
+ }
+ case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
+ break;
+
+ default:
+ Inst->dump();
+ llvm_unreachable("Inconsistency in instructions promotable to vector");
+ }
+ }
+ return true;
+}
+
+static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
+ for (User *User : Val->users()) {
+ if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+ continue;
+ if (isa<CallInst>(User)) {
+ WorkList.push_back(User);
+ continue;
+ }
+ if (!User->getType()->isPointerTy())
+ continue;
+ WorkList.push_back(User);
+ collectUsesWithPtrTypes(User, WorkList);
+ }
+}
+
+void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+ IRBuilder<> Builder(&I);
+
+ // First try to replace the alloca with a vector
+ Type *AllocaTy = I.getAllocatedType();
+
+ DEBUG(dbgs() << "Trying to promote " << I << '\n');
+
+ if (tryPromoteAllocaToVector(&I))
+ return;
+
+ DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
+
+ // FIXME: This is the maximum work group size. We should try to get
+ // value from the reqd_work_group_size function attribute if it is
+ // available.
+ unsigned WorkGroupSize = 256;
+ int AllocaSize = WorkGroupSize *
+ Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
+
+ if (AllocaSize > LocalMemAvailable) {
+ DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
+ return;
+ }
+
+ DEBUG(dbgs() << "Promoting alloca to local memory\n");
+ LocalMemAvailable -= AllocaSize;
+
+ GlobalVariable *GV = new GlobalVariable(
+ *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
+ GlobalValue::ExternalLinkage, 0, I.getName(), 0,
+ GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+
+ FunctionType *FTy = FunctionType::get(
+ Type::getInt32Ty(Mod->getContext()), false);
+ AttributeSet AttrSet;
+ AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
+
+ Value *ReadLocalSizeY = Mod->getOrInsertFunction(
+ "llvm.r600.read.local.size.y", FTy, AttrSet);
+ Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
+ "llvm.r600.read.local.size.z", FTy, AttrSet);
+ Value *ReadTIDIGX = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.x", FTy, AttrSet);
+ Value *ReadTIDIGY = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.y", FTy, AttrSet);
+ Value *ReadTIDIGZ = Mod->getOrInsertFunction(
+ "llvm.r600.read.tidig.z", FTy, AttrSet);
+
+
+ Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
+ Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
+ Value *TIdX = Builder.CreateCall(ReadTIDIGX);
+ Value *TIdY = Builder.CreateCall(ReadTIDIGY);
+ Value *TIdZ = Builder.CreateCall(ReadTIDIGZ);
+
+ Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
+ Tmp0 = Builder.CreateMul(Tmp0, TIdX);
+ Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
+ Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
+ TID = Builder.CreateAdd(TID, TIdZ);
+
+ std::vector<Value*> Indices;
+ Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
+ Indices.push_back(TID);
+
+ Value *Offset = Builder.CreateGEP(GV, Indices);
+ I.mutateType(Offset->getType());
+ I.replaceAllUsesWith(Offset);
+ I.eraseFromParent();
+
+ std::vector<Value*> WorkList;
+
+ collectUsesWithPtrTypes(Offset, WorkList);
+
+ for (std::vector<Value*>::iterator i = WorkList.begin(),
+ e = WorkList.end(); i != e; ++i) {
+ Value *V = *i;
+ CallInst *Call = dyn_cast<CallInst>(V);
+ if (!Call) {
+ Type *EltTy = V->getType()->getPointerElementType();
+ PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
+ V->mutateType(NewTy);
+ continue;
+ }
+
+ IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
+ if (!Intr) {
+ std::vector<Type*> ArgTypes;
+ for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
+ ArgIdx != ArgEnd; ++ArgIdx) {
+ ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
+ }
+ Function *F = Call->getCalledFunction();
+ FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
+ F->isVarArg());
+ Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
+ F->getAttributes());
+ Function *NewF = cast<Function>(C);
+ Call->setCalledFunction(NewF);
+ continue;
+ }
+
+ Builder.SetInsertPoint(Intr);
+ switch (Intr->getIntrinsicID()) {
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // These intrinsics are for address space 0 only
+ Intr->eraseFromParent();
+ continue;
+ case Intrinsic::memcpy: {
+ MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
+ Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
+ MemCpy->getLength(), MemCpy->getAlignment(),
+ MemCpy->isVolatile());
+ Intr->eraseFromParent();
+ continue;
+ }
+ case Intrinsic::memset: {
+ MemSetInst *MemSet = cast<MemSetInst>(Intr);
+ Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
+ MemSet->getLength(), MemSet->getAlignment(),
+ MemSet->isVolatile());
+ Intr->eraseFromParent();
+ continue;
+ }
+ default:
+ Intr->dump();
+ llvm_unreachable("Don't know how to promote alloca intrinsic use.");
+ }
+ }
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
+ return new AMDGPUPromoteAlloca(ST);
+}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 19927fa..3433280 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -17,9 +17,9 @@
using namespace llvm;
-AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm)
+AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st)
: AMDGPUGenRegisterInfo(0),
- TM(tm)
+ ST(st)
{ }
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index a7cba0d..4731595 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -25,27 +25,19 @@
namespace llvm {
-class AMDGPUTargetMachine;
+class AMDGPUSubtarget;
class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
- TargetMachine &TM;
static const MCPhysReg CalleeSavedReg;
+ const AMDGPUSubtarget &ST;
- AMDGPURegisterInfo(TargetMachine &tm);
+ AMDGPURegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
}
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns The ISA reg class that is equivalent to \p RC.
- virtual const TargetRegisterClass * getISARegClass(
- const TargetRegisterClass * RC) const {
- assert(!"Unimplemented"); return nullptr;
- }
-
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
assert(!"Unimplemented"); return nullptr;
}
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index f3b9932..b83c290 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -13,6 +13,8 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
+#include "R600InstrInfo.h"
+#include "SIInstrInfo.h"
using namespace llvm;
@@ -23,90 +25,42 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
-AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
- AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
- InstrItins = getInstrItineraryForCPU(CPU);
-
- // Default card
- StringRef GPU = CPU;
- Is64bit = false;
- HasVertexCache = false;
- TexVTXClauseSize = 0;
- Gen = AMDGPUSubtarget::R600;
- FP64 = false;
- CaymanISA = false;
- EnableIRStructurizer = true;
- EnableIfCvt = true;
- WavefrontSize = 0;
- CFALUBug = false;
+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) :
+ AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ DevName(GPU),
+ Is64bit(false),
+ DumpCode(false),
+ R600ALUInst(false),
+ HasVertexCache(false),
+ TexVTXClauseSize(0),
+ Gen(AMDGPUSubtarget::R600),
+ FP64(false),
+ CaymanISA(false),
+ EnableIRStructurizer(true),
+ EnableIfCvt(true),
+ WavefrontSize(0),
+ CFALUBug(false),
+ LocalMemorySize(0),
+ InstrItins(getInstrItineraryForCPU(GPU)) {
ParseSubtargetFeatures(GPU, FS);
- DevName = GPU;
-}
-bool
-AMDGPUSubtarget::is64bit() const {
- return Is64bit;
-}
-bool
-AMDGPUSubtarget::hasVertexCache() const {
- return HasVertexCache;
-}
-short
-AMDGPUSubtarget::getTexVTXClauseSize() const {
- return TexVTXClauseSize;
-}
-enum AMDGPUSubtarget::Generation
-AMDGPUSubtarget::getGeneration() const {
- return Gen;
-}
-bool
-AMDGPUSubtarget::hasHWFP64() const {
- return FP64;
-}
-bool
-AMDGPUSubtarget::hasCaymanISA() const {
- return CaymanISA;
-}
-bool
-AMDGPUSubtarget::IsIRStructurizerEnabled() const {
- return EnableIRStructurizer;
-}
-bool
-AMDGPUSubtarget::isIfCvtEnabled() const {
- return EnableIfCvt;
-}
-unsigned
-AMDGPUSubtarget::getWavefrontSize() const {
- return WavefrontSize;
+ if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ InstrInfo.reset(new R600InstrInfo(*this));
+ } else {
+ InstrInfo.reset(new SIInstrInfo(*this));
+ }
}
-unsigned
-AMDGPUSubtarget::getStackEntrySize() const {
+
+unsigned AMDGPUSubtarget::getStackEntrySize() const {
assert(getGeneration() <= NORTHERN_ISLANDS);
switch(getWavefrontSize()) {
case 16:
return 8;
case 32:
- if (hasCaymanISA())
- return 4;
- else
- return 8;
+ return hasCaymanISA() ? 4 : 8;
case 64:
return 4;
default:
llvm_unreachable("Illegal wavefront size.");
}
}
-bool
-AMDGPUSubtarget::hasCFAluBug() const {
- assert(getGeneration() <= NORTHERN_ISLANDS);
- return CFALUBug;
-}
-bool
-AMDGPUSubtarget::isTargetELF() const {
- return false;
-}
-
-std::string
-AMDGPUSubtarget::getDeviceName() const {
- return DevName;
-}
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 1b041d6..0c388b3 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -15,6 +15,7 @@
#ifndef AMDGPUSUBTARGET_H
#define AMDGPUSUBTARGET_H
#include "AMDGPU.h"
+#include "AMDGPUInstrInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -27,6 +28,9 @@
namespace llvm {
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+
+ std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
+
public:
enum Generation {
R600 = 0,
@@ -40,42 +44,78 @@ public:
private:
std::string DevName;
bool Is64bit;
- bool Is32on64bit;
bool DumpCode;
bool R600ALUInst;
bool HasVertexCache;
short TexVTXClauseSize;
- enum Generation Gen;
+ Generation Gen;
bool FP64;
bool CaymanISA;
bool EnableIRStructurizer;
bool EnableIfCvt;
unsigned WavefrontSize;
bool CFALUBug;
+ int LocalMemorySize;
InstrItineraryData InstrItins;
public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
- const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+ const AMDGPUInstrInfo *getInstrInfo() const {
+ return InstrInfo.get();
+ }
+
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool is64bit() const;
- bool hasVertexCache() const;
- short getTexVTXClauseSize() const;
- enum Generation getGeneration() const;
- bool hasHWFP64() const;
- bool hasCaymanISA() const;
+ bool is64bit() const {
+ return Is64bit;
+ }
+
+ bool hasVertexCache() const {
+ return HasVertexCache;
+ }
+
+ short getTexVTXClauseSize() const {
+ return TexVTXClauseSize;
+ }
+
+ Generation getGeneration() const {
+ return Gen;
+ }
+
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
bool hasBFE() const {
return (getGeneration() >= EVERGREEN);
}
+ bool hasBFI() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
bool hasBFM() const {
return hasBFE();
}
+ bool hasBCNT(unsigned Size) const {
+ if (Size == 32)
+ return (getGeneration() >= EVERGREEN);
+
+ assert(Size == 64);
+ return (getGeneration() >= SOUTHERN_ISLANDS);
+ }
+
bool hasMulU24() const {
return (getGeneration() >= EVERGREEN);
}
@@ -85,22 +125,48 @@ public:
hasCaymanISA());
}
- bool IsIRStructurizerEnabled() const;
- bool isIfCvtEnabled() const;
- unsigned getWavefrontSize() const;
+ bool IsIRStructurizerEnabled() const {
+ return EnableIRStructurizer;
+ }
+
+ bool isIfCvtEnabled() const {
+ return EnableIfCvt;
+ }
+
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
+ }
+
unsigned getStackEntrySize() const;
- bool hasCFAluBug() const;
+
+ bool hasCFAluBug() const {
+ assert(getGeneration() <= NORTHERN_ISLANDS);
+ return CFALUBug;
+ }
+
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
+ }
bool enableMachineScheduler() const override {
return getGeneration() <= NORTHERN_ISLANDS;
}
// Helper functions to simplify if statements
- bool isTargetELF() const;
- std::string getDeviceName() const;
- bool dumpCode() const { return DumpCode; }
- bool r600ALUEncoding() const { return R600ALUInst; }
+ bool isTargetELF() const {
+ return false;
+ }
+ StringRef getDeviceName() const {
+ return DevName;
+ }
+
+ bool dumpCode() const {
+ return DumpCode;
+ }
+ bool r600ALUEncoding() const {
+ return R600ALUInst;
+ }
};
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 174fdca..8aab944 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -80,10 +80,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
InstrItins(&Subtarget.getInstrItineraryData()) {
// TLInfo uses InstrInfo so it must be initialized after.
if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
- InstrInfo.reset(new R600InstrInfo(*this));
TLInfo.reset(new R600TargetLowering(*this));
} else {
- InstrInfo.reset(new SIInstrInfo(*this));
TLInfo.reset(new SITargetLowering(*this));
}
setRequiresStructuredCFG(true);
@@ -111,6 +109,7 @@ public:
return nullptr;
}
+ virtual void addCodeGenPrepare();
bool addPreISel() override;
bool addInstSelector() override;
bool addPreRegAlloc() override;
@@ -136,6 +135,13 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
PM.add(createAMDGPUTargetTransformInfoPass(this));
}
+void AMDGPUPassConfig::addCodeGenPrepare() {
+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+ addPass(createAMDGPUPromoteAlloca(ST));
+ addPass(createSROAPass());
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool
AMDGPUPassConfig::addPreISel() {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
@@ -159,7 +165,6 @@ bool AMDGPUPassConfig::addInstSelector() {
}
bool AMDGPUPassConfig::addPreRegAlloc() {
- addPass(createAMDGPUConvertToISAPass(*TM));
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
@@ -169,6 +174,8 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
// SIFixSGPRCopies can generate a lot of duplicate instructions,
// so we need to run MachineCSE afterwards.
addPass(&MachineCSEID);
+ initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
+ insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID);
}
return false;
}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index 1287e13..3bb15be 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -17,8 +17,8 @@
#include "AMDGPUFrameLowering.h"
#include "AMDGPUInstrInfo.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
#include "llvm/IR/DataLayout.h"
@@ -30,7 +30,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
const DataLayout Layout;
AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
- std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
const InstrItineraryData *InstrItins;
@@ -46,13 +45,13 @@ public:
return &IntrinsicInfo;
}
const AMDGPUInstrInfo *getInstrInfo() const override {
- return InstrInfo.get();
+ return getSubtargetImpl()->getInstrInfo();
}
const AMDGPUSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const AMDGPURegisterInfo *getRegisterInfo() const override {
- return &InstrInfo->getRegisterInfo();
+ return &getInstrInfo()->getRegisterInfo();
}
AMDGPUTargetLowering *getTargetLowering() const override {
return TLInfo.get();
diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td
deleted file mode 100644
index 5dcd478..0000000
--- a/lib/Target/R600/AMDILBase.td
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-// Dummy Instruction itineraries for pseudo instructions
-def ALU_NULL : FuncUnit;
-def NullALU : InstrItinClass;
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-
-include "AMDILRegisterInfo.td"
-include "AMDILInstrInfo.td"
-
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
deleted file mode 100644
index 7cea803..0000000
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ /dev/null
@@ -1,560 +0,0 @@
-//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief TargetLowering functions borrowed from AMDIL.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUISelLowering.h"
-#include "AMDGPURegisterInfo.h"
-#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation Help Functions End
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Class Implementation Begins
-//===----------------------------------------------------------------------===//
-void AMDGPUTargetLowering::InitAMDILLowering() {
- static const MVT::SimpleValueType types[] = {
- MVT::i8,
- MVT::i16,
- MVT::i32,
- MVT::f32,
- MVT::f64,
- MVT::i64,
- MVT::v2i8,
- MVT::v4i8,
- MVT::v2i16,
- MVT::v4i16,
- MVT::v4f32,
- MVT::v4i32,
- MVT::v2f32,
- MVT::v2i32,
- MVT::v2f64,
- MVT::v2i64
- };
-
- static const MVT::SimpleValueType IntTypes[] = {
- MVT::i8,
- MVT::i16,
- MVT::i32,
- MVT::i64
- };
-
- static const MVT::SimpleValueType FloatTypes[] = {
- MVT::f32,
- MVT::f64
- };
-
- static const MVT::SimpleValueType VectorTypes[] = {
- MVT::v2i8,
- MVT::v4i8,
- MVT::v2i16,
- MVT::v4i16,
- MVT::v4f32,
- MVT::v4i32,
- MVT::v2f32,
- MVT::v2i32,
- MVT::v2f64,
- MVT::v2i64
- };
-
- const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
- // These are the current register classes that are
- // supported
-
- for (MVT VT : types) {
- setOperationAction(ISD::SUBE, VT, Expand);
- setOperationAction(ISD::SUBC, VT, Expand);
- setOperationAction(ISD::ADDE, VT, Expand);
- setOperationAction(ISD::ADDC, VT, Expand);
- setOperationAction(ISD::BRCOND, VT, Custom);
- setOperationAction(ISD::BR_JT, VT, Expand);
- setOperationAction(ISD::BRIND, VT, Expand);
- // TODO: Implement custom UREM/SREM routines
- setOperationAction(ISD::SREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
- if (VT != MVT::i64 && VT != MVT::v2i64) {
- setOperationAction(ISD::SDIV, VT, Custom);
- }
- }
- for (MVT VT : FloatTypes) {
- // IL does not have these operations for floating point types
- setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
- setOperationAction(ISD::SETOLT, VT, Expand);
- setOperationAction(ISD::SETOGE, VT, Expand);
- setOperationAction(ISD::SETOGT, VT, Expand);
- setOperationAction(ISD::SETOLE, VT, Expand);
- setOperationAction(ISD::SETULT, VT, Expand);
- setOperationAction(ISD::SETUGE, VT, Expand);
- setOperationAction(ISD::SETUGT, VT, Expand);
- setOperationAction(ISD::SETULE, VT, Expand);
- }
-
- for (MVT VT : IntTypes) {
- // GPU also does not have divrem function for signed or unsigned
- setOperationAction(ISD::SDIVREM, VT, Expand);
-
- // GPU does not have [S|U]MUL_LOHI functions as a single instruction
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
-
- setOperationAction(ISD::BSWAP, VT, Expand);
-
- // GPU doesn't have any counting operators
- setOperationAction(ISD::CTPOP, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- }
-
- for (MVT VT : VectorTypes) {
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
- setOperationAction(ISD::SDIVREM, VT, Expand);
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- // setOperationAction(ISD::VSETCC, VT, Expand);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
-
- }
- setOperationAction(ISD::MULHU, MVT::i64, Expand);
- setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
- setOperationAction(ISD::MULHS, MVT::i64, Expand);
- setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
- setOperationAction(ISD::ADD, MVT::v2i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
- setOperationAction(ISD::Constant , MVT::i64 , Legal);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
- if (STM.hasHWFP64()) {
- // we support loading/storing v2f64 but not operations on the type
- setOperationAction(ISD::FADD, MVT::v2f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
- // We want to expand vector conversions into their scalar
- // counterparts.
- setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
- setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
- setOperationAction(ISD::FABS, MVT::f64, Expand);
- setOperationAction(ISD::FABS, MVT::v2f64, Expand);
- }
- // TODO: Fix the UDIV24 algorithm so it works for these
- // types correctly. This needs vector comparisons
- // for this to work correctly.
- setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SUBC, MVT::Other, Expand);
- setOperationAction(ISD::ADDE, MVT::Other, Expand);
- setOperationAction(ISD::ADDC, MVT::Other, Expand);
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::BRIND, MVT::Other, Expand);
-
-
- // Use the default implementation.
- setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
- setOperationAction(ISD::Constant , MVT::i32 , Legal);
-
- setSchedulingPreference(Sched::RegPressure);
- setPow2DivIsCheap(false);
- setSelectIsExpensive(true);
- setJumpIsExpensive(true);
-
- MaxStoresPerMemcpy = 4096;
- MaxStoresPerMemmove = 4096;
- MaxStoresPerMemset = 4096;
-
-}
-
-bool
-AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const {
- return false;
-}
-
-// The backend supports 32 and 64 bit floating point immediates
-bool
-AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return true;
- } else {
- return false;
- }
-}
-
-bool
-AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
- if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
- || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
- return false;
- } else {
- return true;
- }
-}
-
-
-// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
-// be zero. Op is expected to be a target specific node. Used by DAG
-// combiner.
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Hooks
-//===----------------------------------------------------------------------===//
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSDIV64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSDIV32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16
- || OVT.getScalarType() == MVT::i8) {
- DST = LowerSDIV24(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {
- EVT OVT = Op.getValueType();
- SDValue DST;
- if (OVT.getScalarType() == MVT::i64) {
- DST = LowerSREM64(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i32) {
- DST = LowerSREM32(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i16) {
- DST = LowerSREM16(Op, DAG);
- } else if (OVT.getScalarType() == MVT::i8) {
- DST = LowerSREM8(Op, DAG);
- } else {
- DST = SDValue(Op.getNode(), 0);
- }
- return DST;
-}
-
-EVT
-AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {
- int iSize = (size * numEle);
- int vEle = (iSize >> ((size == 64) ? 6 : 5));
- if (!vEle) {
- vEle = 1;
- }
- if (size == 64) {
- if (vEle == 1) {
- return EVT(MVT::i64);
- } else {
- return EVT(MVT::getVectorVT(MVT::i64, vEle));
- }
- } else {
- if (vEle == 1) {
- return EVT(MVT::i32);
- } else {
- return EVT(MVT::getVectorVT(MVT::i32, vEle));
- }
- }
-}
-
-SDValue
-AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- SDValue Cond = Op.getOperand(1);
- SDValue Jump = Op.getOperand(2);
- SDValue Result;
- Result = DAG.getNode(
- AMDGPUISD::BRANCH_COND,
- SDLoc(Op),
- Op.getValueType(),
- Chain, Jump, Cond);
- return Result;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- MVT INTTY;
- MVT FLTTY;
- if (!OVT.isVector()) {
- INTTY = MVT::i32;
- FLTTY = MVT::f32;
- } else if (OVT.getVectorNumElements() == 2) {
- INTTY = MVT::v2i32;
- FLTTY = MVT::v2f32;
- } else if (OVT.getVectorNumElements() == 4) {
- INTTY = MVT::v4i32;
- FLTTY = MVT::v4f32;
- }
- unsigned bitsize = OVT.getScalarType().getSizeInBits();
- // char|short jq = ia ^ ib;
- SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
-
- // jq = jq >> (bitsize - 2)
- jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
-
- // jq = jq | 0x1
- jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
-
- // jq = (int)jq
- jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
-
- // int ia = (int)LHS;
- SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
-
- // int ib, (int)RHS;
- SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
-
- // float fa = (float)ia;
- SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
-
- // float fb = (float)ib;
- SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
-
- // float fq = native_divide(fa, fb);
- SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
-
- // fq = trunc(fq);
- fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
-
- // float fqneg = -fq;
- SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
-
- // float fr = mad(fqneg, fb, fa);
- SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY,
- DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa);
-
- // int iq = (int)fq;
- SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
-
- // fr = fabs(fr);
- fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
-
- // fb = fabs(fb);
- fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
-
- // int cv = fr >= fb;
- SDValue cv;
- if (INTTY == MVT::i32) {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- } else {
- cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
- }
- // jq = (cv ? jq : 0);
- jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
- DAG.getConstant(0, OVT));
- // dst = iq + jq;
- iq = DAG.getSExtOrTrunc(iq, DL, OVT);
- iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
- return iq;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSDIV32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r0, r0, r1
- // ixor r10, r10, r11
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSelectCC(DL,
- r0, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSelectCC(DL,
- r1, DAG.getConstant(0, OVT),
- DAG.getConstant(-1, MVT::i32),
- DAG.getConstant(0, MVT::i32),
- ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r0, r0, r1
- r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
-
- // ixor r10, r10, r11
- r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i8) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i8) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- MVT INTTY = MVT::i32;
- if (OVT == MVT::v2i16) {
- INTTY = MVT::v2i32;
- } else if (OVT == MVT::v4i16) {
- INTTY = MVT::v4i32;
- }
- SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
- SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
- LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
- LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
- return LHS;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT OVT = Op.getValueType();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- // The LowerSREM32 function generates equivalent to the following IL.
- // mov r0, LHS
- // mov r1, RHS
- // ilt r10, r0, 0
- // ilt r11, r1, 0
- // iadd r0, r0, r10
- // iadd r1, r1, r11
- // ixor r0, r0, r10
- // ixor r1, r1, r11
- // udiv r20, r0, r1
- // umul r20, r20, r1
- // sub r0, r0, r20
- // iadd r0, r0, r10
- // ixor DST, r0, r10
-
- // mov r0, LHS
- SDValue r0 = LHS;
-
- // mov r1, RHS
- SDValue r1 = RHS;
-
- // ilt r10, r0, 0
- SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // ilt r11, r1, 0
- SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // iadd r1, r1, r11
- r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
-
- // ixor r0, r0, r10
- r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
-
- // ixor r1, r1, r11
- r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
-
- // udiv r20, r0, r1
- SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
-
- // umul r20, r20, r1
- r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
-
- // sub r0, r0, r20
- r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
-
- // iadd r0, r0, r10
- r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
-
- // ixor DST, r0, r10
- SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
- return DST;
-}
-
-SDValue
-AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const {
- return SDValue(Op.getNode(), 0);
-}
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
deleted file mode 100644
index 0f0c88d..0000000
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ /dev/null
@@ -1,150 +0,0 @@
-//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file describes the AMDIL instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-//===--------------------------------------------------------------------===//
-// Custom Operands
-//===--------------------------------------------------------------------===//
-def brtarget : Operand<OtherVT>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Type Profiles
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Generic Profile Types
-//===----------------------------------------------------------------------===//
-
-def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
- ]>;
-def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
- SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
- ]>;
-def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
- SDTCisEltOfVec<1, 0>
- ]>;
-
-//===----------------------------------------------------------------------===//
-// Flow Control Profile Types
-//===----------------------------------------------------------------------===//
-// Branch instruction where second and third are basic blocks
-def SDTIL_BRCond : SDTypeProfile<0, 2, [
- SDTCisVT<0, OtherVT>
- ]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Selection DAG Nodes
-//===--------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Flow Control DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
-
-//===----------------------------------------------------------------------===//
-// Call/Return DAG Nodes
-//===----------------------------------------------------------------------===//
-def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-
-//===--------------------------------------------------------------------===//
-// Instructions
-//===--------------------------------------------------------------------===//
-// Floating point math functions
-def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
-
-//===----------------------------------------------------------------------===//
-// Integer functions
-//===----------------------------------------------------------------------===//
-def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-
-//===--------------------------------------------------------------------===//
-// Custom Pattern DAG Nodes
-//===--------------------------------------------------------------------===//
-def global_store : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return isGlobalStore(dyn_cast<StoreSDNode>(N));
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Load pattern fragments
-//===----------------------------------------------------------------------===//
-// Global address space loads
-def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-}]>;
-// Constant address space loads
-def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Complex addressing mode patterns
-//===----------------------------------------------------------------------===//
-def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
-def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
-def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
-def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
-
-//===----------------------------------------------------------------------===//
-// Instruction format classes
-//===----------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
-}
-
-//===--------------------------------------------------------------------===//
-// Multiclass Instruction formats
-//===--------------------------------------------------------------------===//
-// Multiclass that handles branch instructions
-multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
- def _i32 : ILFormat<(outs),
- (ins brtarget:$target, rci:$src0),
- "; i32 Pseudo branch instruction",
- [(Op bb:$target, (i32 rci:$src0))]>;
- def _f32 : ILFormat<(outs),
- (ins brtarget:$target, rcf:$src0),
- "; f32 Pseudo branch instruction",
- [(Op bb:$target, (f32 rcf:$src0))]>;
-}
-
-// Only scalar types should generate flow control
-multiclass BranchInstr<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src),
- !strconcat(name, " $src"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src),
- !strconcat(name, " $src"), []>;
-}
-// Only scalar types should generate flow control
-multiclass BranchInstr2<string name> {
- def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
- def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
- !strconcat(name, " $src0, $src1"), []>;
-}
-
-//===--------------------------------------------------------------------===//
-// Intrinsics support
-//===--------------------------------------------------------------------===//
-include "AMDILIntrinsics.td"
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
deleted file mode 100644
index 4a3e02e..0000000
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ /dev/null
@@ -1,224 +0,0 @@
-//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// This file defines all of the amdil-specific intrinsics
-//
-//===---------------------------------------------------------------===//
-//===--------------------------------------------------------------------===//
-// Intrinsic classes
-// Generic versions of the above classes but for Target specific intrinsics
-// instead of SDNode patterns.
-//===--------------------------------------------------------------------===//
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- class VoidIntLong :
- Intrinsic<[llvm_i64_ty], [], []>;
- class VoidIntInt :
- Intrinsic<[llvm_i32_ty], [], []>;
- class VoidIntBool :
- Intrinsic<[llvm_i32_ty], [], []>;
- class UnaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
- class ConvertIntFTOI :
- Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
- class ConvertIntITOF :
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
- class UnaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty], []>;
- class UnaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty], []>;
- class BinaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class BinaryIntNoRetInt :
- Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
- class BinaryIntNoRetFloat :
- Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
- class TernaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class TernaryIntFloat :
- Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class QuaternaryIntInt :
- Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
- class UnaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicInt :
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
- class UnaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class BinaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
- class TernaryAtomicIntNoRet :
- Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
-}
-
-let TargetPrefix = "AMDIL", isTarget = 1 in {
- def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
-
- def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
- UnaryIntInt;
- def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
- UnaryIntInt;
- def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
- UnaryIntInt;
- def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
- TernaryIntInt;
- def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
- TernaryIntInt;
- def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
- QuaternaryIntInt;
- def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
- TernaryIntInt;
- def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
- BinaryIntInt;
- def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
- BinaryIntInt;
- def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
- BinaryIntInt;
- def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
- BinaryIntInt;
- def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
- BinaryIntInt;
- def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
- BinaryIntInt;
- def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
- BinaryIntInt;
- def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
- BinaryIntFloat;
- def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
- BinaryIntInt;
- def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
- BinaryIntInt;
- def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
- BinaryIntFloat;
- def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
- TernaryIntInt;
- def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
- TernaryIntInt;
- def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
- TernaryIntInt;
- def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
- UnaryIntFloat;
- def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
- TernaryIntFloat;
- def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
- UnaryIntFloat;
- def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
- UnaryIntFloat;
- def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
- UnaryIntFloat;
- def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
- UnaryIntFloat;
- def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
- UnaryIntFloat;
- def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
- UnaryIntFloat;
- def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
- UnaryIntFloat;
- def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
- UnaryIntFloat;
- def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
- UnaryIntFloat;
- def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
- UnaryIntFloat;
- def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
- UnaryIntFloat;
- def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
- UnaryIntFloat;
- def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
- def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
- def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
- def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
- UnaryIntFloat;
- def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
- UnaryIntFloat;
- def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
- UnaryIntFloat;
- def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
- UnaryIntFloat;
- def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
- UnaryIntFloat;
- def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
- UnaryIntFloat;
- def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
- UnaryIntFloat;
- def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
- UnaryIntFloat;
- def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
- TernaryIntFloat;
- def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
- UnaryIntFloat;
- def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
- UnaryIntFloat;
- def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
- UnaryIntFloat;
- def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
- TernaryIntFloat;
- def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i32_ty], []>;
-
- def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
- Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
- def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
- Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
- def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
- def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
- ConvertIntITOF;
- def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
- ConvertIntFTOI;
- def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
- ConvertIntFTOI;
- def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
- Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
- def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
- ConvertIntITOF;
- def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
- ConvertIntITOF;
- def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty, llvm_float_ty], []>;
- def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
- Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
- llvm_v2f32_ty], []>;
- def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
- def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
- Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
- llvm_v4f32_ty], []>;
-}
diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td
deleted file mode 100644
index b9d0334..0000000
--- a/lib/Target/R600/AMDILRegisterInfo.td
+++ /dev/null
@@ -1,107 +0,0 @@
-//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//==-----------------------------------------------------------------------===//
-//
-// Declarations that describe the AMDIL register file
-//
-//===----------------------------------------------------------------------===//
-
-class AMDILReg<bits<16> num, string n> : Register<n> {
- field bits<16> Value;
- let Value = num;
- let Namespace = "AMDGPU";
-}
-
-// We will start with 8 registers for each class before expanding to more
-// Since the swizzle is added based on the register class, we can leave it
-// off here and just specify different registers for different register classes
-def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
-def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
-def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
-def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
-def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
-def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
-def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
-def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
-def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
-def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
-def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
-def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
-def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
-def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
-def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
-def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
-def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
-def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
-def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
-def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
-
-// All registers between 1000 and 1024 are reserved and cannot be used
-// unless commented in this section
-// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
-// r1020 is used to hold the frame index for local arrays
-// r1019 is used to hold the dynamic stack allocation pointer
-// r1018 is used as a temporary register for handwritten code
-// r1017 is used as a temporary register for handwritten code
-// r1016 is used as a temporary register for load/store code
-// r1015 is used as a temporary register for data segment offset
-// r1014 is used as a temporary register for store code
-// r1013 is used as the section data pointer register
-// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
-// r1009 is used as the frame pointer register
-// r999 is used as the mem register.
-// r998 is used as the return address register.
-//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
-//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
-//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
-//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
-//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
-//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
-def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
-def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
-def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
-def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
-def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
-def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
-def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
-def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
-def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
-def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
-def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
-def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
-def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
-def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
-def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
-def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
-def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
-def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
-def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
-def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
-def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
-def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
-def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
-def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
- (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> {
- let AltOrders = [(add (sequence "R%u", 1, 20))];
- let AltOrderSelect = [{
- return 1;
- }];
- }
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 3c6fa5a..4d16082 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -13,10 +13,9 @@ add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(R600CodeGen
AMDILCFGStructurizer.cpp
- AMDILIntrinsicInfo.cpp
- AMDILISelLowering.cpp
AMDGPUAsmPrinter.cpp
AMDGPUFrameLowering.cpp
+ AMDGPUIntrinsicInfo.cpp
AMDGPUISelDAGToDAG.cpp
AMDGPUMCInstLower.cpp
AMDGPUMachineFunction.cpp
@@ -24,8 +23,8 @@ add_llvm_target(R600CodeGen
AMDGPUTargetMachine.cpp
AMDGPUTargetTransformInfo.cpp
AMDGPUISelLowering.cpp
- AMDGPUConvertToISA.cpp
AMDGPUInstrInfo.cpp
+ AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
@@ -41,6 +40,7 @@ add_llvm_target(R600CodeGen
R600TextureIntrinsicsReplacer.cpp
SIAnnotateControlFlow.cpp
SIFixSGPRCopies.cpp
+ SIFixSGPRLiveRanges.cpp
SIInsertWaits.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 2065441..dcb7e98 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)),
def : Pat<(i32 (sext_inreg i32:$src, i16)),
(BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
-defm : BFIPatterns <BFI_INT_eg>;
+defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
[(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))],
@@ -326,6 +326,8 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
def DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
+def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
+
let hasSideEffects = 1 in {
def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>;
}
@@ -346,7 +348,7 @@ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> {
def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>;
def GROUP_BARRIER : InstR600 <
- (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>,
+ (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>,
R600ALU_Word0,
R600ALU_Word1_OP2 <0x54> {
@@ -375,6 +377,11 @@ def GROUP_BARRIER : InstR600 <
let ALUInst = 1;
}
+def : Pat <
+ (int_AMDGPU_barrier_global),
+ (GROUP_BARRIER)
+>;
+
//===----------------------------------------------------------------------===//
// LDS Instructions
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 11ae091..0927040 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -99,9 +99,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
return;
}
- // The low 8 bits encoding value is the register index, for both VGPRs and
- // SGPRs.
- unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+ // The low 8 bits of the encoding value is the register index, for both VGPRs
+ // and SGPRs.
+ unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
if (NumRegs == 1) {
O << Type << RegIdx;
return;
@@ -216,13 +216,8 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- union Literal {
- float f;
- int32_t i;
- } L;
-
- L.i = MI->getOperand(OpNo).getImm();
- O << L.i << "(" << L.f << ")";
+ int32_t Imm = MI->getOperand(OpNo).getImm();
+ O << Imm << '(' << BitsToFloat(Imm) << ')';
}
void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 5e7cefe..dc1344f 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixup,
const MCSubtargetInfo &STI) const {
if (MO.isReg()) {
- if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) {
+ if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags))
return MRI.getEncodingValue(MO.getReg());
- } else {
- return getHWReg(MO.getReg());
- }
- } else if (MO.isImm()) {
- return MO.getImm();
- } else {
- assert(0);
- return 0;
+ return getHWReg(MO.getReg());
}
+
+ assert(MO.isImm());
+ return MO.getImm();
}
#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index d255e96..d98a6db 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index d6c6830..7f3560a 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -13,6 +13,9 @@
//===----------------------------------------------------------------------===//
#include "R600ISelLowering.h"
+#include "AMDGPUFrameLowering.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
@@ -65,6 +68,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::FSUB, MVT::f32, Expand);
@@ -133,19 +137,47 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ setOperationAction(ISD::SUB, MVT::i64, Expand);
+
// These should be replaced by UDVIREM, but it does not happen automatically
// during Type Legalization
setOperationAction(ISD::UDIV, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i64, Custom);
+ setOperationAction(ISD::SDIV, MVT::i64, Custom);
+ setOperationAction(ISD::SREM, MVT::i64, Custom);
+
+ // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
+ // to be Legal/Custom in order to avoid library calls.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
+ for (MVT VT : ScalarIntVTs) {
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ }
+
setBooleanContents(ZeroOrNegativeOneBooleanContent);
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
setSchedulingPreference(Sched::Source);
@@ -537,11 +569,24 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
+ case ISD::SRA_PARTS:
+ case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
case ISD::FCOS:
case ISD::FSIN: return LowerTrig(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::LOAD: {
+ SDValue Result = LowerLOAD(Op, DAG);
+ assert((!Result.getNode() ||
+ Result.getNode()->getNumValues() == 2) &&
+ "Load should return a value and a chain");
+ return Result;
+ }
+
+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_VOID: {
SDValue Chain = Op.getOperand(0);
@@ -776,6 +821,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
case Intrinsic::r600_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
+ case Intrinsic::AMDGPU_rsq:
+ // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
+ return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
break;
@@ -793,20 +841,172 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
return;
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
return;
- case ISD::LOAD: {
- SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode();
- Results.push_back(SDValue(Node, 0));
- Results.push_back(SDValue(Node, 1));
- // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode
- // function
- DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1));
- return;
+ case ISD::UDIV: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM);
+ break;
}
- case ISD::STORE:
- SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode();
- Results.push_back(SDValue(Node, 0));
- return;
+ case ISD::UREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM.getValue(1));
+ break;
+ }
+ case ISD::SDIV: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(SDIVREM);
+ break;
+ }
+ case ISD::SREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(SDIVREM.getValue(1));
+ break;
+ }
+ case ISD::SDIVREM: {
+ SDValue Op = SDValue(N, 1);
+ SDValue RES = LowerSDIVREM(Op, DAG);
+ Results.push_back(RES);
+ Results.push_back(RES.getValue(1));
+ break;
+ }
+ case ISD::UDIVREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
+
+ //HiLo split
+ SDValue LHS = N->getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
+
+ SDValue RHS = N->getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
+
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
+
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
+
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
+
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
+
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
+
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
+
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
+
+ // Update REM
+
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
+
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+ break;
}
+ }
+}
+
+SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
+ SDValue Vector) const {
+
+ SDLoc DL(Vector);
+ EVT VecVT = Vector.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SmallVector<SDValue, 8> Args;
+
+ for (unsigned i = 0, e = VecVT.getVectorNumElements();
+ i != e; ++i) {
+ Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Vector, DAG.getConstant(i, getVectorIdxTy())));
+ }
+
+ return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
+}
+
+SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ SDLoc DL(Op);
+ SDValue Vector = Op.getOperand(0);
+ SDValue Index = Op.getOperand(1);
+
+ if (isa<ConstantSDNode>(Index) ||
+ Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ return Op;
+
+ Vector = vectorToVerticalVector(DAG, Vector);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
+ Vector, Index);
+}
+
+SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Vector = Op.getOperand(0);
+ SDValue Value = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
+
+ if (isa<ConstantSDNode>(Index) ||
+ Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ return Op;
+
+ Vector = vectorToVerticalVector(DAG, Vector);
+ SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
+ Vector, Value, Index);
+ return vectorToVerticalVector(DAG, Insert);
}
SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
@@ -840,6 +1040,80 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstantFP(3.14159265359, MVT::f32));
}
+SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shift = Op.getOperand(2);
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue One = DAG.getConstant(1, VT);
+
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+ SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+ // The dance around Width1 is necessary for 0 special case.
+ // Without it the CompShift might be 32, producing incorrect results in
+ // Overflow. So we do the shift in two steps, the alternative is to
+ // add a conditional to filter the special case.
+
+ SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
+ Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
+
+ SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
+ HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
+ SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
+
+ SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
+ SDValue LoBig = Zero;
+
+ Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+ Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
+SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Shift = Op.getOperand(2);
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDValue One = DAG.getConstant(1, VT);
+
+ const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
+
+ SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT);
+ SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT);
+ SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
+ SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
+
+ // The dance around Width1 is necessary for 0 special case.
+ // Without it the CompShift might be 32, producing incorrect results in
+ // Overflow. So we do the shift in two steps, the alternative is to
+ // add a conditional to filter the special case.
+
+ SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
+ Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
+
+ SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
+ SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
+ LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
+
+ SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
+ SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
+
+ Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
+ Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
+
+ return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
+}
+
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::SETCC,
@@ -1369,6 +1643,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
return DAG.getMergeValues(Ops, DL);
}
+SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+
+ return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
+ Chain, Jump, Cond);
+}
+
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
@@ -1902,9 +2185,8 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SDValue FakeOp;
std::vector<SDValue> Ops;
- for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
- I != E; ++I)
- Ops.push_back(*I);
+ for (const SDUse &I : Node->ops())
+ Ops.push_back(I);
if (Opcode == AMDGPU::DOT_4) {
int OperandIdx[] = {
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index a8a464f..d22c8c9 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -51,15 +51,18 @@ private:
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
MachineRegisterInfo & MRI, unsigned dword_offset) const;
SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const;
+ SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const;
- /// \brief Lower ROTL opcode to BITALIGN
- SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
-
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const;
SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth,
SelectionDAG &DAG) const;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index b0d9ae3..3972e2f 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -28,10 +28,9 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
-R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm),
- ST(tm.getSubtarget<AMDGPUSubtarget>())
+R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUInstrInfo(st),
+ RI(st)
{ }
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const {
@@ -52,11 +51,15 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
unsigned VectorComponents = 0;
- if (AMDGPU::R600_Reg128RegClass.contains(DestReg) &&
- AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
+ if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
+ AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+ (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
+ AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
VectorComponents = 4;
- } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) &&
- AMDGPU::R600_Reg64RegClass.contains(SrcReg)) {
+ } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
+ AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+ (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
+ AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
VectorComponents = 2;
}
@@ -768,16 +771,6 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return true;
}
-int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
- const MachineInstr *MI = op.getParent();
-
- switch (MI->getDesc().OpInfo->RegClass) {
- default: // FIXME: fallthrough??
- case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
- case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
- };
-}
-
static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
@@ -1064,10 +1057,34 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
return 2;
}
+bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+
+ switch(MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+ case AMDGPU::R600_EXTRACT_ELT_V2:
+ case AMDGPU::R600_EXTRACT_ELT_V4:
+ buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(),
+ RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
+ MI->getOperand(2).getReg(),
+ RI.getHWRegChan(MI->getOperand(1).getReg()));
+ break;
+ case AMDGPU::R600_INSERT_ELT_V2:
+ case AMDGPU::R600_INSERT_ELT_V4:
+ buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value
+ RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address
+ MI->getOperand(3).getReg(), // Offset
+ RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel
+ break;
+ }
+ MI->eraseFromParent();
+ return true;
+}
+
void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const {
const AMDGPUFrameLowering *TFL =
- static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering());
+ static_cast<const AMDGPUFrameLowering*>(
+ MF.getTarget().getFrameLowering());
unsigned StackWidth = TFL->getStackWidth(MF);
int End = getIndirectIndexEnd(MF);
@@ -1100,7 +1117,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
- unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const {
+ unsigned AddrReg;
+ switch (AddrChan) {
+ default: llvm_unreachable("Invalid Channel");
+ case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+ }
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X, OffsetReg);
setImmOperand(MOVA, AMDGPU::OpName::write, 0);
@@ -1117,7 +1149,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned ValueReg, unsigned Address,
unsigned OffsetReg) const {
- unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address);
+ return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0);
+}
+
+MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const {
+ unsigned AddrReg;
+ switch (AddrChan) {
+ default: llvm_unreachable("Invalid Channel");
+ case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+ }
MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
AMDGPU::AR_X,
OffsetReg);
@@ -1220,7 +1267,6 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
const {
assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
unsigned Opcode;
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::R700)
Opcode = AMDGPU::DOT4_r600;
else
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index b5304a0..45a57d3 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -32,12 +32,22 @@ namespace llvm {
class R600InstrInfo : public AMDGPUInstrInfo {
private:
const R600RegisterInfo RI;
- const AMDGPUSubtarget &ST;
- int getBranchInstr(const MachineOperand &op) const;
std::vector<std::pair<int, unsigned> >
ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const;
+
+ MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const;
+
+ MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg,
+ unsigned AddrChan) const;
public:
enum BankSwizzle {
ALU_VEC_012_SCL_210 = 0,
@@ -48,7 +58,7 @@ namespace llvm {
ALU_VEC_210
};
- explicit R600InstrInfo(AMDGPUTargetMachine &tm);
+ explicit R600InstrInfo(const AMDGPUSubtarget &st);
const R600RegisterInfo &getRegisterInfo() const override;
void copyPhysReg(MachineBasicBlock &MBB,
@@ -197,6 +207,8 @@ namespace llvm {
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override { return 1;}
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+
/// \brief Reserve the registers that may be accesed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const;
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 590fde2..73fa345 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -125,7 +125,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern,
class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
InstrItinClass itin = AnyALU> :
R600_1OP <inst, opName,
- [(set R600_Reg32:$dst, (node R600_Reg32:$src0))]
+ [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin
>;
// If you add or change the operands for R600_2OP instructions, you must
@@ -161,10 +161,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern,
}
class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node,
- InstrItinClass itim = AnyALU> :
+ InstrItinClass itin = AnyALU> :
R600_2OP <inst, opName,
[(set R600_Reg32:$dst, (node R600_Reg32:$src0,
- R600_Reg32:$src1))]
+ R600_Reg32:$src1))], itin
>;
// If you add our change the operands for R600_3OP instructions, you must
@@ -721,14 +721,11 @@ def SETNE_DX10 : R600_2OP <
>;
def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
-def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>;
+def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>;
def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>;
def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>;
def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>;
-// Add also ftrunc intrinsic pattern
-def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>;
-
def MOV : R600_1OP <0x19, "MOV", []>;
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
@@ -1082,18 +1079,21 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper <
let Itinerary = TransALU;
}
+// Clamped to maximum.
class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq
+ inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped
> {
let Itinerary = TransALU;
}
-class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP <
- inst, "RECIPSQRT_IEEE", []
+class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
+ inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy
> {
let Itinerary = TransALU;
}
+// TODO: There is also RECIPSQRT_FF which clamps to zero.
+
class SIN_Common <bits<11> inst> : R600_1OP <
inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{
let Trig = 1;
@@ -1266,13 +1266,6 @@ defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
//===----------------------------------------------------------------------===//
-// Branch Instructions
-//===----------------------------------------------------------------------===//
-
-def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src),
- "IF_PREDICATE_SET $src", []>;
-
-//===----------------------------------------------------------------------===//
// Pseudo instructions
//===----------------------------------------------------------------------===//
@@ -1345,15 +1338,6 @@ def TXD_SHADOW: InstR600 <
} // End isPseudo = 1
} // End usesCustomInserter = 1
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
- usesCustomInserter = 1 in {
- def RETURN : ILFormat<(outs), (ins variable_ops),
- "RETURN", [(IL_retflag)]>;
-}
-
//===----------------------------------------------------------------------===//
// Constant Buffer Addressing Support
@@ -1480,11 +1464,52 @@ let Inst{63-32} = Word1;
let VTXInst = 1;
}
+//===---------------------------------------------------------------------===//
+// Flow and Program control Instructions
+//===---------------------------------------------------------------------===//
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+}
+
+multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
+ def _i32 : ILFormat<(outs),
+ (ins brtarget:$target, rci:$src0),
+ "; i32 Pseudo branch instruction",
+ [(Op bb:$target, (i32 rci:$src0))]>;
+ def _f32 : ILFormat<(outs),
+ (ins brtarget:$target, rcf:$src0),
+ "; f32 Pseudo branch instruction",
+ [(Op bb:$target, (f32 rcf:$src0))]>;
+}
+
+// Only scalar types should generate flow control
+multiclass BranchInstr<string name> {
+ def _i32 : ILFormat<(outs), (ins R600_Reg32:$src),
+ !strconcat(name, " $src"), []>;
+ def _f32 : ILFormat<(outs), (ins R600_Reg32:$src),
+ !strconcat(name, " $src"), []>;
+}
+// Only scalar types should generate flow control
+multiclass BranchInstr2<string name> {
+ def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+ def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1),
+ !strconcat(name, " $src0, $src1"), []>;
+}
-
-//===--------------------------------------------------------------------===//
-// Instructions support
-//===--------------------------------------------------------------------===//
//===---------------------------------------------------------------------===//
// Custom Inserter for Branches and returns, this eventually will be a
// separate pass
@@ -1497,13 +1522,22 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
}
//===---------------------------------------------------------------------===//
-// Flow and Program control Instructions
+// Return instruction
//===---------------------------------------------------------------------===//
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1 in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(IL_retflag)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+
+def IF_PREDICATE_SET : ILFormat<(outs), (ins R600_Reg32:$src),
+ "IF_PREDICATE_SET $src", []>;
+
let isTerminator=1 in {
- def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("SWITCH", " $src"), []>;
- def CASE : ILFormat< (outs), (ins GPRI32:$src),
- !strconcat("CASE", " $src"), []>;
def BREAK : ILFormat< (outs), (ins),
"BREAK", []>;
def CONTINUE : ILFormat< (outs), (ins),
@@ -1548,6 +1582,60 @@ let isTerminator=1 in {
}
//===----------------------------------------------------------------------===//
+// Indirect addressing pseudo instructions
+//===----------------------------------------------------------------------===//
+
+let isPseudo = 1 in {
+
+class ExtractVertical <RegisterClass vec_rc> : InstR600 <
+ (outs R600_Reg32:$dst),
+ (ins vec_rc:$vec, R600_Reg32:$index), "",
+ [],
+ AnyALU
+>;
+
+let Constraints = "$dst = $vec" in {
+
+class InsertVertical <RegisterClass vec_rc> : InstR600 <
+ (outs vec_rc:$dst),
+ (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "",
+ [],
+ AnyALU
+>;
+
+} // End Constraints = "$dst = $vec"
+
+} // End isPseudo = 1
+
+def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>;
+def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>;
+
+def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>;
+def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>;
+
+class ExtractVerticalPat <Instruction inst, ValueType vec_ty,
+ ValueType scalar_ty> : Pat <
+ (scalar_ty (extractelt vec_ty:$vec, i32:$index)),
+ (inst $vec, $index)
+>;
+
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>;
+def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>;
+
+class InsertVerticalPat <Instruction inst, ValueType vec_ty,
+ ValueType scalar_ty> : Pat <
+ (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)),
+ (inst $vec, $value, $index)
+>;
+
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>;
+def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
+
+//===----------------------------------------------------------------------===//
// ISel Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index d1655d1..7ea654c 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Pass.h"
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index c2f6c03..74cf309 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "R600InstrInfo.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp
index f3bb88b..dc95675 100644
--- a/lib/Target/R600/R600RegisterInfo.cpp
+++ b/lib/Target/R600/R600RegisterInfo.cpp
@@ -20,15 +20,14 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm)
-: AMDGPURegisterInfo(tm),
- TM(tm)
+R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st)
+: AMDGPURegisterInfo(st)
{ RCW.RegWeight = 0; RCW.WeightLimit = 0;}
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
- const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
+ const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo());
Reserved.set(AMDGPU::ZERO);
Reserved.set(AMDGPU::HALF);
@@ -55,16 +54,6 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass *
-R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- case AMDGPU::GPRI32RegClassID:
- return &AMDGPU::R600_Reg32RegClass;
- default: return rc;
- }
-}
-
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
return this->getEncodingValue(reg) >> HW_CHAN_SHIFT;
}
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index 52e1a4b..247808b 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -16,26 +16,18 @@
#define R600REGISTERINFO_H_
#include "AMDGPURegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
namespace llvm {
-class R600TargetMachine;
+class AMDGPUSubtarget;
struct R600RegisterInfo : public AMDGPURegisterInfo {
- AMDGPUTargetMachine &TM;
RegClassWeight RCW;
- R600RegisterInfo(AMDGPUTargetMachine &tm);
+ R600RegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override;
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns the R600 reg class that is equivalent to \p RC.
- const TargetRegisterClass *getISARegClass(
- const TargetRegisterClass *RC) const override;
-
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td
index 68bcd20..cc667d9 100644
--- a/lib/Target/R600/R600RegisterInfo.td
+++ b/lib/Target/R600/R600RegisterInfo.td
@@ -18,18 +18,28 @@ class R600RegWithChan <string name, bits<9> sel, string chan> :
class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1, sub2, sub3];
- let HWEncoding = encoding;
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
}
class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> :
RegisterWithSubRegs<n, subregs> {
+ field bits<2> chan_encoding = 0;
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = encoding;
+ let HWEncoding{8-0} = encoding{8-0};
+ let HWEncoding{10-9} = chan_encoding;
}
+class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 <
+ "V"#lo#hi#"_"#chan,
+ [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)],
+ lo
+>;
foreach Index = 0-127 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -54,6 +64,24 @@ foreach Index = 0-127 in {
Index>;
}
+foreach Chan = [ "X", "Y", "Z", "W"] in {
+
+ let chan_encoding = !if(!eq(Chan, "X"), 0,
+ !if(!eq(Chan, "Y"), 1,
+ !if(!eq(Chan, "Z"), 2,
+ !if(!eq(Chan, "W"), 3, 0)))) in {
+ def V0123_#Chan : R600Reg_128 <"V0123_"#Chan,
+ [!cast<Register>("T0_"#Chan),
+ !cast<Register>("T1_"#Chan),
+ !cast<Register>("T2_"#Chan),
+ !cast<Register>("T3_"#Chan)],
+ 0>;
+ def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>;
+ def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>;
+ }
+}
+
+
// KCACHE_BANK0
foreach Index = 159-128 in {
foreach Chan = [ "X", "Y", "Z", "W" ] in {
@@ -130,8 +158,14 @@ def ALU_PARAM : R600Reg<"Param", 0>;
let isAllocatable = 0 in {
-// XXX: Only use the X channel, until we support wider stack widths
-def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>;
+def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>;
+
+// We only use Addr_[YZW] for vertical vectors.
+// FIXME if we add more vertical vector registers we will need to ad more
+// registers to these classes.
+def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>;
+def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>;
+def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>;
def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32,
(add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>;
@@ -206,5 +240,13 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
let CopyCost = -1;
}
+def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128,
+ (add V0123_W, V0123_Z, V0123_Y, V0123_X)
+>;
+
def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
(add (sequence "T%u_XY", 0, 63))>;
+
+def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+ (add V01_X, V01_Y, V01_Z, V01_W,
+ V23_X, V23_Y, V23_Z, V23_W)>;
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index d6e4451..91eb60b 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -65,7 +65,6 @@ class SIAnnotateControlFlow : public FunctionPass {
DominatorTree *DT;
StackVector Stack;
- SSAUpdater PhiInserter;
bool isTopOfStack(BasicBlock *BB);
@@ -81,7 +80,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
- void handleLoopCondition(Value *Cond);
+ Value *handleLoopCondition(Value *Cond, PHINode *Broken);
void handleLoop(BranchInst *Term);
@@ -177,7 +176,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) {
} else {
if (Phi->getIncomingValue(i) != BoolFalse)
return false;
-
+
}
}
return true;
@@ -204,20 +203,26 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
+ BasicBlock *Parent = Phi->getParent();
+ PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
+ Value *Ret = NewPhi;
// Handle all non-constant incoming values first
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = Phi->getIncomingValue(i);
- if (isa<ConstantInt>(Incoming))
+ BasicBlock *From = Phi->getIncomingBlock(i);
+ if (isa<ConstantInt>(Incoming)) {
+ NewPhi->addIncoming(Broken, From);
continue;
+ }
Phi->setIncomingValue(i, BoolFalse);
- handleLoopCondition(Incoming);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken);
+ NewPhi->addIncoming(PhiArg, From);
}
- BasicBlock *Parent = Phi->getParent();
BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock();
for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) {
@@ -230,33 +235,28 @@ void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) {
if (From == IDom) {
CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt());
if (OldEnd && OldEnd->getCalledFunction() == EndCf) {
- Value *Args[] = {
- OldEnd->getArgOperand(0),
- PhiInserter.GetValueAtEndOfBlock(Parent)
- };
- Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
- PhiInserter.AddAvailableValue(Parent, Ret);
+ Value *Args[] = { OldEnd->getArgOperand(0), NewPhi };
+ Ret = CallInst::Create(ElseBreak, Args, "", OldEnd);
continue;
}
}
-
TerminatorInst *Insert = From->getTerminator();
- Value *Arg = PhiInserter.GetValueAtEndOfBlock(From);
- Value *Ret = CallInst::Create(Break, Arg, "", Insert);
- PhiInserter.AddAvailableValue(From, Ret);
+ Value *PhiArg = CallInst::Create(Break, Broken, "", Insert);
+ NewPhi->setIncomingValue(i, PhiArg);
}
eraseIfUnused(Phi);
+ return Ret;
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
TerminatorInst *Insert = Parent->getTerminator();
- Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) };
- Value *Ret = CallInst::Create(IfBreak, Args, "", Insert);
- PhiInserter.AddAvailableValue(Parent, Ret);
+ Value *Args[] = { Cond, Broken };
+ return CallInst::Create(IfBreak, Args, "", Insert);
} else {
llvm_unreachable("Unhandled loop condition!");
}
+ return 0;
}
/// \brief Handle a back edge (loop)
@@ -264,15 +264,11 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
- PhiInserter.Initialize(Int64, "");
- PhiInserter.AddAvailableValue(Target, Broken);
-
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- handleLoopCondition(Cond);
+ Value *Arg = handleLoopCondition(Cond, Broken);
BasicBlock *BB = Term->getParent();
- Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB);
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {
diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h
index 2cbce28..4d31a11 100644
--- a/lib/Target/R600/SIDefines.h
+++ b/lib/Target/R600/SIDefines.h
@@ -35,4 +35,54 @@ enum {
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
+
+#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
+#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
+#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F)
+#define C_00B848_VGPRS 0xFFFFFFC0
+#define S_00B848_SGPRS(x) (((x) & 0x0F) << 6)
+#define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F)
+#define C_00B848_SGPRS 0xFFFFFC3F
+#define S_00B848_PRIORITY(x) (((x) & 0x03) << 10)
+#define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03)
+#define C_00B848_PRIORITY 0xFFFFF3FF
+#define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12)
+#define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF)
+#define C_00B848_FLOAT_MODE 0xFFF00FFF
+#define S_00B848_PRIV(x) (((x) & 0x1) << 20)
+#define G_00B848_PRIV(x) (((x) >> 20) & 0x1)
+#define C_00B848_PRIV 0xFFEFFFFF
+#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21)
+#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1)
+#define C_00B848_DX10_CLAMP 0xFFDFFFFF
+#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22)
+#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1)
+#define C_00B848_DEBUG_MODE 0xFFBFFFFF
+#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
+#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
+#define C_00B848_IEEE_MODE 0xFF7FFFFF
+
+
+// Helpers for setting FLOAT_MODE
+#define FP_ROUND_ROUND_TO_NEAREST 0
+#define FP_ROUND_ROUND_TO_INF 1
+#define FP_ROUND_ROUND_TO_NEGINF 2
+#define FP_ROUND_ROUND_TO_ZERO 3
+
+// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
+// precision.
+#define FP_ROUND_MODE_SP(x) ((x) & 0x3)
+#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
+
+#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
+#define FP_DENORM_FLUSH_OUT 1
+#define FP_DENORM_FLUSH_IN 2
+#define FP_DENORM_FLUSH_NONE 3
+
+
+// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
+// precision.
+#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
+#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
+
#endif // SIDEFINES_H_
diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
new file mode 100644
index 0000000..7d116ee
--- /dev/null
+++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp
@@ -0,0 +1,110 @@
+//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// SALU instructions ignore control flow, so we need to modify the live ranges
+/// of the registers they define.
+///
+/// The strategy is to view the entire program as if it were a single basic
+/// block and calculate the intervals accordingly. We implement this
+/// by walking this list of segments for each LiveRange and setting the
+/// end of each segment equal to the start of the segment that immediately
+/// follows it.
+
+#include "AMDGPU.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-fix-sgpr-live-ranges"
+
+namespace {
+
+class SIFixSGPRLiveRanges : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SIFixSGPRLiveRanges() : MachineFunctionPass(ID) {
+ initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Fix SGPR live ranges";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
+ "SI Fix SGPR Live Ranges", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
+ "SI Fix SGPR Live Ranges", false, false)
+
+char SIFixSGPRLiveRanges::ID = 0;
+
+char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID;
+
+FunctionPass *llvm::createSIFixSGPRLiveRangesPass() {
+ return new SIFixSGPRLiveRanges();
+}
+
+bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC);
+ if (ExecUse)
+ continue;
+
+ for (const MachineOperand &Def : MI.operands()) {
+ if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg()))
+ continue;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg());
+
+ if (!TRI->isSGPRClass(RC))
+ continue;
+ LiveInterval &LI = LIS->getInterval(Def.getReg());
+ for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) {
+ LiveRange::Segment &Seg = LI.segments[i];
+ LiveRange::Segment &Next = LI.segments[i + 1];
+ Seg.end = Next.start;
+ }
+ }
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index c9e247c..b13c3b8 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -14,8 +14,8 @@
#include "SIISelLowering.h"
#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
-#include "AMDILIntrinsicInfo.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
+#include "llvm/ADT/SmallString.h"
using namespace llvm;
@@ -76,6 +77,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADD, MVT::i32, Legal);
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
+ setOperationAction(ISD::SUBC, MVT::i32, Legal);
+ setOperationAction(ISD::SUBE, MVT::i32, Legal);
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -88,14 +91,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
// We need to custom lower loads/stores from private memory
setOperationAction(ISD::LOAD, MVT::i32, Custom);
- setOperationAction(ISD::LOAD, MVT::i64, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
- setOperationAction(ISD::STORE, MVT::i64, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
@@ -105,18 +106,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
- setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
- setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
-
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
@@ -139,6 +136,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
@@ -215,9 +213,16 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
+ // FIXME: These should be removed and handled the same was as f32 fneg. Source
+ // modifiers also work for the double instructions.
+ setOperationAction(ISD::FNEG, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
+
setSchedulingPreference(Sched::RegPressure);
}
@@ -265,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return VT.bitsGT(MVT::i32);
}
-bool SITargetLowering::shouldSplitVectorType(EVT VT) const {
- return VT.getScalarType().bitsLE(MVT::i16);
+TargetLoweringBase::LegalizeTypeAction
+SITargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
+ return TypeSplitVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
}
bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -482,19 +491,20 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::V_SUB_F64:
- BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
- MI->getOperand(0).getReg())
- .addReg(MI->getOperand(1).getReg())
- .addReg(MI->getOperand(2).getReg())
- .addImm(0) /* src2 */
- .addImm(0) /* ABS */
- .addImm(0) /* CLAMP */
- .addImm(0) /* OMOD */
- .addImm(2); /* NEG */
+ case AMDGPU::V_SUB_F64: {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
+ .addImm(0) // SRC0 modifiers
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(1) // SRC1 modifiers
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(0) // SRC2 modifiers
+ .addImm(0) // src2
+ .addImm(0) // CLAMP
+ .addImm(0); // OMOD
MI->eraseFromParent();
break;
-
+ }
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -595,27 +605,31 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::LOAD: {
LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
+ EVT VT = Op.getValueType();
+
+ // These loads are legal.
+ if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ VT.isVector() && VT.getVectorNumElements() == 2 &&
+ VT.getVectorElementType() == MVT::i32)
+ return SDValue();
+
if (Op.getValueType().isVector() &&
(Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
(Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
Op.getValueType().getVectorNumElements() > 4))) {
- SDValue MergedValues[2] = {
- SplitVectorLoad(Op, DAG),
- Load->getChain()
- };
- return DAG.getMergeValues(MergedValues, SDLoc(Op));
+ return SplitVectorLoad(Op, DAG);
} else {
- return LowerLOAD(Op, DAG);
+ SDValue Result = LowerLOAD(Op, DAG);
+ assert((!Result.getNode() ||
+ Result.getNode()->getNumValues() == 2) &&
+ "Load should return a value and a chain");
+ return Result;
}
}
case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::ANY_EXTEND: // Fall-through
- case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntrinsicID =
@@ -827,13 +841,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
- SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
- SDValue MergedValues[2];
- MergedValues[1] = Load->getChain();
- if (Ret.getNode()) {
- MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, DL);
- }
+ SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
+ if (Lowered.getNode())
+ return Lowered;
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
return SDValue();
@@ -846,25 +856,38 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
DAG.getConstant(2, MVT::i32));
- Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Load->getChain(), Ptr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+
+ // FIXME: REGISTER_LOAD should probably have a chain result.
+ SDValue Chain = Load->getChain();
+ SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
+
+ SDValue Ret = LoLoad.getValue(0);
if (MemVT.getSizeInBits() == 64) {
+ // TODO: This needs a test to make sure the right thing is happening with
+ // the chain. That is hard without general function support.
+
SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
DAG.getConstant(1, MVT::i32));
- SDValue LoadUpper = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Load->getChain(), IncPtr,
- DAG.getTargetConstant(0, MVT::i32),
- Op.getOperand(2));
+ SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, IncPtr,
+ DAG.getTargetConstant(0, MVT::i32),
+ Op.getOperand(2));
- Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ret, LoadUpper);
+ Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
+ // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ // LoLoad.getValue(1), HiLoad.getValue(1));
}
- MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, DL);
+ SDValue Ops[] = {
+ Ret,
+ Chain
+ };
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
@@ -903,39 +926,17 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
}
-SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
- return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
-}
-
-SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- if (VT != MVT::i64) {
- return SDValue();
- }
-
- SDValue Hi = DAG.getNode(ISD::SRA, DL, MVT::i32, Op.getOperand(0),
- DAG.getConstant(31, MVT::i32));
-
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi);
-}
-
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();
+ // These stores are legal.
+ if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ VT.isVector() && VT.getVectorNumElements() == 2 &&
+ VT.getVectorElementType() == MVT::i32)
+ return SDValue();
+
SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
if (Ret.getNode())
return Ret;
@@ -1011,27 +1012,99 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return Chain;
}
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
+
+SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ EVT ScalarVT = VT.getScalarType();
+ if (ScalarVT != MVT::f32)
+ return SDValue();
-SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
- if (VT != MVT::i64) {
+ SDValue Src = N->getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ // TODO: We could try to match extracting the higher bytes, which would be
+ // easier if i8 vectors weren't promoted to i32 vectors, particularly after
+ // types are legalized. v4i8 -> v4f32 is probably the only case to worry
+ // about in practice.
+ if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
+ if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
+ SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
+ DCI.AddToWorklist(Cvt.getNode());
+ return Cvt;
+ }
+ }
+
+ // We are primarily trying to catch operations on illegal vector types
+ // before they are expanded.
+ // For scalars, we can use the more flexible method of checking masked bits
+ // after legalization.
+ if (!DCI.isBeforeLegalize() ||
+ !SrcVT.isVector() ||
+ SrcVT.getVectorElementType() != MVT::i8) {
return SDValue();
}
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() != MVT::i32)
- Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+ assert(DCI.isBeforeLegalize() && "Unexpected legal type");
- SDValue Zero = DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero);
-}
+ // Weird sized vectors are a pain to handle, but we know 3 is really the same
+ // size as 4.
+ unsigned NElts = SrcVT.getVectorNumElements();
+ if (!SrcVT.isSimple() && NElts != 3)
+ return SDValue();
-//===----------------------------------------------------------------------===//
-// Custom DAG optimizations
-//===----------------------------------------------------------------------===//
+ // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
+ // prevent a mess from expanding to v4i32 and repacking.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
+ EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
+ EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
+ EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
+
+ LoadSDNode *Load = cast<LoadSDNode>(Src);
+ SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
+ Load->getChain(),
+ Load->getBasePtr(),
+ LoadVT,
+ Load->getMemOperand());
+
+ // Make sure successors of the original load stay after it by updating
+ // them to use the new Chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
+
+ SmallVector<SDValue, 4> Elts;
+ if (RegVT.isVector())
+ DAG.ExtractVectorElements(NewLoad, Elts);
+ else
+ Elts.push_back(NewLoad);
+
+ SmallVector<SDValue, 4> Ops;
+
+ unsigned EltIdx = 0;
+ for (SDValue Elt : Elts) {
+ unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
+ for (unsigned I = 0; I < ComponentsInElt; ++I) {
+ unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
+ SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
+ DCI.AddToWorklist(Cvt.getNode());
+ Ops.push_back(Cvt);
+ }
+
+ ++EltIdx;
+ }
+
+ assert(Ops.size() == NElts);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
+ }
+
+ return SDValue();
+}
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
@@ -1074,6 +1147,31 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3: {
+ unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
+
+ SDValue Src = N->getOperand(0);
+ APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+ TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ break;
+ }
+
+ case ISD::UINT_TO_FP: {
+ return performUCharToFloatCombine(N, DCI);
+ }
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -1297,7 +1395,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
bool HaveVSrc = false, HaveSSrc = false;
- // First figure out what we alread have in this instruction
+ // First figure out what we already have in this instruction.
for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
i != e && Op < NumOps; ++i, ++Op) {
@@ -1316,7 +1414,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
}
}
- // If we neither have VSrc nor SSrc it makes no sense to continue
+ // If we neither have VSrc nor SSrc, it makes no sense to continue.
if (!HaveVSrc && !HaveSSrc)
return Node;
@@ -1332,17 +1430,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
const SDValue &Operand = Node->getOperand(i);
Ops.push_back(Operand);
- // Already folded immediate ?
+ // Already folded immediate?
if (isa<ConstantSDNode>(Operand.getNode()) ||
isa<ConstantFPSDNode>(Operand.getNode()))
continue;
- // Is this a VSrc or SSrc operand ?
+ // Is this a VSrc or SSrc operand?
unsigned RegClass = Desc->OpInfo[Op].RegClass;
if (isVSrc(RegClass) || isSSrc(RegClass)) {
// Try to fold the immediates
if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
- // Folding didn't worked, make sure we don't hit the SReg limit
+ // Folding didn't work, make sure we don't hit the SReg limit.
ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
}
continue;
@@ -1371,7 +1469,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
continue;
if (DescE64) {
-
// Test if it makes sense to switch to e64 encoding
unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
@@ -1402,7 +1499,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
if (!DescE64)
continue;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
Ops.pop_back();
@@ -1412,7 +1509,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
if (!DescE64)
continue;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
}
@@ -1535,7 +1632,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
}
}
-/// \brief Fold the instructions after slecting them
+/// \brief Fold the instructions after selecting them.
SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
const SIInstrInfo *TII =
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index c6eaa81..e25323a 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -27,10 +27,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
@@ -46,11 +43,16 @@ class SITargetLowering : public AMDGPUTargetLowering {
void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const;
MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const;
+ static SDValue performUCharToFloatCombine(SDNode *N,
+ DAGCombinerInfo &DCI);
+
public:
SITargetLowering(TargetMachine &tm);
bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
bool *IsFast) const override;
- bool shouldSplitVectorType(EVT VT) const override;
+
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(EVT VT) const override;
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index a17fed7..1733326 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
return Result;
}
+// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States"
+// around other non-memory instructions.
bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
bool Changes = false;
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 168eff2..7cae9fc 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -51,6 +51,16 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
let Size = 8;
}
+class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
+ Enc64 <outs, ins, asm, pattern> {
+
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let UseNamedOperandTable = 1;
+ let VOP3 = 1;
+}
+
//===----------------------------------------------------------------------===//
// Scalar operations
//===----------------------------------------------------------------------===//
@@ -207,7 +217,7 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
}
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+ VOP3Common <outs, ins, asm, pattern> {
bits<8> dst;
bits<2> src0_modifiers;
@@ -233,16 +243,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{61} = src0_modifiers{0};
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
-
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP3 = 1;
+
}
class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
- Enc64 <outs, ins, asm, pattern> {
+ VOP3Common <outs, ins, asm, pattern> {
bits<8> dst;
bits<2> src0_modifiers;
@@ -266,11 +271,6 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{62} = src1_modifiers{0};
let Inst{63} = src2_modifiers{0};
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let UseNamedOperandTable = 1;
- let VOP3 = 1;
}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index 4a9e346..455c890 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -19,13 +19,14 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCInstrDesc.h"
using namespace llvm;
-SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
- : AMDGPUInstrInfo(tm),
- RI(tm) { }
+SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st)
+ : AMDGPUInstrInfo(st),
+ RI(st) { }
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -187,18 +188,25 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0)
+ .addReg(SrcReg);
+ } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
+ unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF);
+ unsigned TgtReg = MFI->SpillTracker.LaneVGPR;
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
- MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane);
} else if (RI.isSGPRClass(RC)) {
// We are only allowed to create one new instruction when spilling
// registers, so we need to use pseudo instruction for vector
@@ -207,8 +215,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// Reserve a spot in the spill tracker for each sub-register of
// the vector register.
unsigned NumSubRegs = RC->getSize() / 4;
- unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
- NumSubRegs);
+ unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
FirstLane);
@@ -234,19 +241,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
- if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
- SIMachineFunctionInfo::SpilledReg Spill =
- MFI->SpillTracker.getSpilledReg(FrameIndex);
- assert(Spill.VGPR);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
- insertNOPs(MI, 3);
+
+ if (RI.hasVGPRs(RC)) {
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!");
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+ .addImm(0);
} else if (RI.isSGPRClass(RC)){
unsigned Opcode;
switch(RC->getSize() * 8) {
+ case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
@@ -260,7 +267,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addReg(Spill.VGPR)
.addImm(FrameIndex);
- insertNOPs(MI, 3);
} else {
llvm_unreachable("VGPR spilling not supported");
}
@@ -281,6 +287,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S64_RESTORE:
return 2;
+ case AMDGPU::SI_SPILL_S32_RESTORE:
+ return 1;
default: llvm_unreachable("Invalid spill opcode");
}
}
@@ -334,7 +342,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S256_RESTORE:
case AMDGPU::SI_SPILL_S128_RESTORE:
- case AMDGPU::SI_SPILL_S64_RESTORE: {
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ case AMDGPU::SI_SPILL_S32_RESTORE: {
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
@@ -348,6 +357,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
.addReg(MI->getOperand(1).getReg())
.addImm(Spill.Lane + i);
}
+ insertNOPs(MI, 3);
MI->eraseFromParent();
break;
}
@@ -514,6 +524,23 @@ bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const {
return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO);
}
+static bool compareMachineOp(const MachineOperand &Op0,
+ const MachineOperand &Op1) {
+ if (Op0.getType() != Op1.getType())
+ return false;
+
+ switch (Op0.getType()) {
+ case MachineOperand::MO_Register:
+ return Op0.getReg() == Op1.getReg();
+ case MachineOperand::MO_Immediate:
+ return Op0.getImm() == Op1.getImm();
+ case MachineOperand::MO_FPImmediate:
+ return Op0.getFPImm() == Op1.getFPImm();
+ default:
+ llvm_unreachable("Didn't expect to be comparing these operand types");
+ }
+}
+
bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
StringRef &ErrInfo) const {
uint16_t Opcode = MI->getOpcode();
@@ -532,7 +559,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
// Make sure the register classes are correct
for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) {
switch (Desc.OpInfo[i].OperandType) {
- case MCOI::OPERAND_REGISTER:
+ case MCOI::OPERAND_REGISTER: {
+ int RegClass = Desc.OpInfo[i].RegClass;
+ if (!RI.regClassCanUseImmediate(RegClass) &&
+ (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) {
+ ErrInfo = "Expected register, but got immediate";
+ return false;
+ }
+ }
break;
case MCOI::OPERAND_IMMEDIATE:
if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) {
@@ -620,6 +654,24 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
return false;
}
}
+
+ // Verify misc. restrictions on specific instructions.
+ if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 ||
+ Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) {
+ MI->dump();
+
+ const MachineOperand &Src0 = MI->getOperand(2);
+ const MachineOperand &Src1 = MI->getOperand(3);
+ const MachineOperand &Src2 = MI->getOperand(4);
+ if (Src0.isReg() && Src1.isReg() && Src2.isReg()) {
+ if (!compareMachineOp(Src0, Src1) &&
+ !compareMachineOp(Src0, Src2)) {
+ ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2";
+ return false;
+ }
+ }
+ }
+
return true;
}
@@ -654,7 +706,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
@@ -667,6 +721,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+ case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32;
+ case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
+ case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
}
}
@@ -731,8 +788,8 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
unsigned SubReg = MRI.createVirtualRegister(SubRC);
// Just in case the super register is itself a sub-register, copy it to a new
- // value so we don't need to wory about merging its subreg index with the
- // SubIdx passed to this function. The register coalescer should be able to
+ // value so we don't need to worry about merging its subreg index with the
+ // SubIdx passed to this function. The register coalescer should be able to
// eliminate this extra copy.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY),
NewSuperReg)
@@ -1157,22 +1214,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
continue;
}
case AMDGPU::S_AND_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_OR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_XOR_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
Inst->eraseFromParent();
continue;
case AMDGPU::S_NOT_B64:
- splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst->eraseFromParent();
+ continue;
+
+ case AMDGPU::S_BCNT1_I32_B64:
+ splitScalar64BitBCNT(Worklist, Inst);
Inst->eraseFromParent();
continue;
@@ -1217,6 +1279,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
// 3 to not hit an assertion later in MCInstLower.
Inst->addOperand(MachineOperand::CreateImm(0));
Inst->addOperand(MachineOperand::CreateImm(0));
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ Inst->addOperand(MachineOperand::CreateImm(0));
}
addDescImplicitUseDef(NewDesc, Inst);
@@ -1297,9 +1363,62 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
return &AMDGPU::VReg_32RegClass;
}
-void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
- MachineInstr *Inst,
- unsigned Opcode) const {
+void SIInstrInfo::splitScalar64BitUnaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src0 = Inst->getOperand(1);
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineBasicBlock::iterator MII = Inst;
+
+ const MCInstrDesc &InstDesc = get(Opcode);
+ const TargetRegisterClass *Src0RC = Src0.isReg() ?
+ MRI.getRegClass(Src0.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+
+ MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub0, Src0SubRC);
+
+ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
+ const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+
+ unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+ MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+ .addOperand(SrcReg0Sub0);
+
+ MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
+ AMDGPU::sub1, Src0SubRC);
+
+ unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+ MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+ .addOperand(SrcReg0Sub1);
+
+ unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep it
+ // valid.
+ Worklist.push_back(LoHalf);
+ Worklist.push_back(HiHalf);
+}
+
+void SIInstrInfo::splitScalar64BitBinaryOp(
+ SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst,
+ unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst->getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -1360,6 +1479,46 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
Worklist.push_back(HiHalf);
}
+void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const {
+ MachineBasicBlock &MBB = *Inst->getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ MachineBasicBlock::iterator MII = Inst;
+ DebugLoc DL = Inst->getDebugLoc();
+
+ MachineOperand &Dest = Inst->getOperand(0);
+ MachineOperand &Src = Inst->getOperand(1);
+
+ const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32);
+ const TargetRegisterClass *SrcRC = Src.isReg() ?
+ MRI.getRegClass(Src.getReg()) :
+ &AMDGPU::SGPR_32RegClass;
+
+ unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
+
+ MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub0, SrcSubRC);
+ MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
+ AMDGPU::sub1, SrcSubRC);
+
+ MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+ .addOperand(SrcRegSub0)
+ .addImm(0);
+
+ MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+ .addOperand(SrcRegSub1)
+ .addReg(MidReg);
+
+ MRI.replaceRegWith(Dest.getReg(), ResultReg);
+
+ Worklist.push_back(First);
+ Worklist.push_back(Second);
+}
+
void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
MachineInstr *Inst) const {
// Add the implict and explicit register definitions.
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index 7b31a81..4c204d8 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -44,13 +44,19 @@ private:
const TargetRegisterClass *RC,
const MachineOperand &Op) const;
- void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist,
- MachineInstr *Inst, unsigned Opcode) const;
+ void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst, unsigned Opcode) const;
+
+ void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst, unsigned Opcode) const;
+
+ void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
+ MachineInstr *Inst) const;
void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
public:
- explicit SIInstrInfo(AMDGPUTargetMachine &tm);
+ explicit SIInstrInfo(const AMDGPUSubtarget &st);
const SIRegisterInfo &getRegisterInfo() const override {
return RI;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index 2242e6d..774c9d1 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -147,6 +147,12 @@ def FRAMEri32 : Operand<iPTR> {
}
//===----------------------------------------------------------------------===//
+// Complex patterns
+//===----------------------------------------------------------------------===//
+
+def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">;
+
+//===----------------------------------------------------------------------===//
// SI assembler operands
//===----------------------------------------------------------------------===//
@@ -187,6 +193,12 @@ class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
opName#" $dst, $src0", pattern
>;
+// 64-bit input, 32-bit output.
+class SOP1_32_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 <
+ op, (outs SReg_32:$dst), (ins SSrc_64:$src0),
+ opName#" $dst, $src0", pattern
+>;
+
class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -260,7 +272,7 @@ class SIMCInstr <string pseudo, int subtarget> {
multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
string opName> {
- def "" : InstSI <outs, ins, "", pattern>, VOP <opName>,
+ def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>,
SIMCInstr<OpName, SISubtarget.NONE> {
let isPseudo = 1;
}
@@ -357,12 +369,13 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
}
multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
- string opName, ValueType vt, PatLeaf cond> {
-
+ string opName, ValueType vt, PatLeaf cond, bit defExec = 0> {
def _e32 : VOPC <
op, (ins arc:$src0, vrc:$src1),
opName#"_e32 $dst, $src0, $src1", []
- >, VOP <opName>;
+ >, VOP <opName> {
+ let Defs = !if(defExec, [VCC, EXEC], [VCC]);
+ }
def _e64 : VOP3 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
@@ -375,6 +388,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
[(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
)
>, VOP <opName> {
+ let Defs = !if(defExec, [EXEC], []);
let src2 = SIOperand.ZERO;
let src2_modifiers = 0;
}
@@ -388,6 +402,14 @@ multiclass VOPC_64 <bits<8> op, string opName,
ValueType vt = untyped, PatLeaf cond = COND_NULL>
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
+multiclass VOPCX_32 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>;
+
+multiclass VOPCX_64 <bits<8> op, string opName,
+ ValueType vt = untyped, PatLeaf cond = COND_NULL>
+ : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>;
+
multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
op, (outs VReg_32:$dst),
(ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
@@ -396,7 +418,7 @@ multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
>;
-class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
+class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
(ins VSrc_64:$src0, VSrc_32:$src1),
opName#" $dst, $src0, $src1", pattern
@@ -410,11 +432,29 @@ class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
- (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2,
+ (ins InputMods:$src0_modifiers, VSrc_64:$src0,
+ InputMods:$src1_modifiers, VSrc_64:$src1,
+ InputMods:$src2_modifiers, VSrc_64:$src2,
+ InstFlag:$clamp, InstFlag:$omod),
+ opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern
+>, VOP <opName>;
+
+
+class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc,
+ string opName, list<dag> pattern> : VOP3 <
+ op, (outs vrc:$dst0, SReg_64:$dst1),
+ (ins arc:$src0, arc:$src1, arc:$src2,
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
+ opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
>, VOP <opName>;
+
+class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> :
+ VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
+
+class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> :
+ VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>;
+
//===----------------------------------------------------------------------===//
// Vector I/O classes
//===----------------------------------------------------------------------===//
@@ -475,10 +515,11 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A
let vdst = 0;
}
+// 1 address, 1 data.
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
asm#" $vdst, $addr, $data0, $offset, [M0]",
[]> {
@@ -487,6 +528,41 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
let mayLoad = 1;
}
+// 1 address, 2 data.
+class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs rc:$vdst),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
+ asm#" $vdst, $addr, $data0, $data1, $offset, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
+// 1 address, 2 data.
+class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset),
+ asm#" $addr, $data0, $data1, $offset, [M0]",
+ []> {
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
+// 1 address, 1 data.
+class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
+ op,
+ (outs),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset),
+ asm#" $addr, $data0, $offset, [M0]",
+ []> {
+
+ let data1 = 0;
+ let mayStore = 1;
+ let mayLoad = 1;
+}
+
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
@@ -500,7 +576,9 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
let mayLoad = 0;
}
-multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
+multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
+ ValueType load_vt = i32,
+ SDPatternOperator ld = null_frag> {
let lds = 0, mayLoad = 1 in {
@@ -542,16 +620,19 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
- asm#" $vdata, $srsrc + $vaddr + $offset", []>;
+ asm#" $vdata, $srsrc + $vaddr + $offset",
+ [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
+ i64:$vaddr, u16imm:$offset)))]>;
}
}
}
-class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
+class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
+ ValueType store_vt, SDPatternOperator st> :
MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
u16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
- []> {
+ [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> {
let mayLoad = 0;
let mayStore = 1;
@@ -658,6 +739,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> {
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>;
}
+class MIMG_Gather_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ RegisterClass src_rc> : MIMG <
+ op,
+ (outs dst_rc:$vdata),
+ (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
+ i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr,
+ SReg_256:$srsrc, SReg_128:$ssamp),
+ asm#" $vdata, $dmask, $unorm, $glc, $da, $r128,"
+ #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp",
+ []> {
+ let mayLoad = 1;
+ let mayStore = 0;
+
+ // DMASK was repurposed for GATHER4. 4 components are always
+ // returned and DMASK works like a swizzle - it selects
+ // the component to fetch. The only useful DMASK values are
+ // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+ // (red,red,red,red) etc.) The ISA document doesn't mention
+ // this.
+ // Therefore, disable all code which updates DMASK by setting these two:
+ let MIMG = 0;
+ let hasPostISelHook = 0;
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+ RegisterClass dst_rc,
+ int channels> {
+ def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>,
+ MIMG_Mask<asm#"_V1", channels>;
+ def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>,
+ MIMG_Mask<asm#"_V2", channels>;
+ def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>,
+ MIMG_Mask<asm#"_V4", channels>;
+ def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>,
+ MIMG_Mask<asm#"_V8", channels>;
+ def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>,
+ MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm> {
+ defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>;
+ defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>;
+ defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>;
+ defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>;
+}
+
//===----------------------------------------------------------------------===//
// Vector instruction mappings
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 500fa78..b3b44e2 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -96,22 +96,35 @@ def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
[(set i32:$dst, (not i32:$src0))]
>;
-def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
+def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64",
+ [(set i64:$dst, (not i64:$src0))]
+>;
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
-def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>;
+def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32",
+ [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+>;
def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
} // End neverHasSideEffects = 1
////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>;
////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>;
-////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>;
-////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>;
-////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>;
+def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32",
+ [(set i32:$dst, (ctpop i32:$src0))]
+>;
+def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>;
+
+////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>;
////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>;
-////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>;
+def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32",
+ [(set i32:$dst, (cttz_zero_undef i32:$src0))]
+>;
////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>;
-//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>;
+
+def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32",
+ [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+>;
+
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
@@ -320,7 +333,7 @@ def S_CMPK_EQ_I32 : SOPK <
>;
*/
-let isCompare = 1 in {
+let isCompare = 1, Defs = [SCC] in {
def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>;
def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>;
def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>;
@@ -332,7 +345,7 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>;
def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>;
def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>;
def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>;
-} // End isCompare = 1
+} // End isCompare = 1, Defs = [SCC]
let Defs = [SCC], isCommutable = 1 in {
def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>;
@@ -467,26 +480,26 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>;
defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">;
defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">;
-defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">;
-defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">;
-defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">;
-defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">;
-defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">;
-defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">;
-defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">;
-defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">;
-defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">;
-defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">;
-defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">;
-defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">;
-defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">;
-defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">;
-defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">;
+defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">;
+defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">;
+defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">;
+defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">;
+defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">;
+defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">;
+defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">;
+defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">;
+defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">;
+defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">;
+defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">;
+defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">;
+defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">;
+defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">;
+defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">;
+defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">;
defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>;
@@ -505,26 +518,26 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>;
defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">;
defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">;
-defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">;
-defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">;
-defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">;
-defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">;
-defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">;
-defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">;
-defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">;
-defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">;
-defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">;
-defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">;
-defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">;
-defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">;
-defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">;
-defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">;
-defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">;
+defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">;
+defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">;
+defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">;
+defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">;
+defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">;
+defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">;
+defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">;
+defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">;
+defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">;
+defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">;
+defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">;
+defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">;
+defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">;
+defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">;
+defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">;
+defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">;
defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">;
@@ -543,26 +556,26 @@ defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">;
defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">;
defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">;
-defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">;
-defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">;
-defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">;
-defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">;
-defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">;
-defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">;
-defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">;
-defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">;
-defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">;
-defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">;
-defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">;
-defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">;
-defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
-defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">;
-defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">;
+defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">;
+defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">;
+defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">;
+defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">;
+defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">;
+defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">;
+defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">;
+defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">;
+defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">;
+defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">;
+defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">;
+defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">;
+defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">;
+defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">;
+defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">;
defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">;
@@ -611,18 +624,18 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>;
defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>;
defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">;
-defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">;
-defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">;
-defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">;
-defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">;
-defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">;
-defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">;
-defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">;
+defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">;
+defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">;
+defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">;
+defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">;
+defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">;
+defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">;
+defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">;
+defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">;
defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>;
@@ -633,18 +646,18 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>;
defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>;
defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">;
-defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">;
-defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">;
-defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">;
-defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">;
-defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">;
-defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">;
-defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">;
+defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">;
+defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">;
+defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">;
+defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">;
+defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">;
+defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">;
+defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">;
+defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">;
defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>;
@@ -655,18 +668,18 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>;
defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>;
defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">;
-defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">;
-defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">;
-defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">;
-defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">;
-defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">;
-defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">;
-defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">;
+defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">;
+defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">;
+defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">;
+defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">;
+defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">;
+defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">;
+defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">;
+defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">;
defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>;
@@ -677,30 +690,30 @@ defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>;
defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>;
defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
+let hasSideEffects = 1 in {
-defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">;
-defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">;
-defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">;
-defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">;
-defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">;
-defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">;
-defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">;
-defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">;
+defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">;
+defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">;
+defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">;
+defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">;
+defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">;
+defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">;
+defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">;
+defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+} // End hasSideEffects = 1
defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">;
+} // End hasSideEffects = 1
defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">;
-let hasSideEffects = 1, Defs = [EXEC] in {
-defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
-} // End hasSideEffects = 1, Defs = [EXEC]
+let hasSideEffects = 1 in {
+defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">;
+} // End hasSideEffects = 1
} // End isCompare = 1
@@ -708,8 +721,97 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
// DS Instructions
//===----------------------------------------------------------------------===//
-def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
-def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
+
+def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>;
+def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>;
+def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>;
+def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>;
+def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>;
+def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>;
+def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>;
+def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>;
+def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>;
+def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>;
+def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>;
+def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>;
+def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>;
+def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>;
+def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>;
+def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>;
+def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>;
+
+def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>;
+def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>;
+def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>;
+def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>;
+def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>;
+def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>;
+def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>;
+def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>;
+def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>;
+def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>;
+def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>;
+def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>;
+def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>;
+def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>;
+//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>;
+//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>;
+def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>;
+def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>;
+def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>;
+def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>;
+
+let SubtargetPredicate = isCI in {
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>;
+} // End isCI
+
+
+def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>;
+def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>;
+def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>;
+def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>;
+def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>;
+def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>;
+def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>;
+def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>;
+def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>;
+def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>;
+def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>;
+def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>;
+def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>;
+def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>;
+def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>;
+def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>;
+def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>;
+
+def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>;
+def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>;
+def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>;
+def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>;
+def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>;
+def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>;
+def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>;
+def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>;
+def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>;
+def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>;
+def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>;
+def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>;
+def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>;
+def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>;
+//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>;
+//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>;
+def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>;
+def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>;
+def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>;
+def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>;
+
+//let SubtargetPredicate = isCI in {
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+//} // End isCI
+
+// TODO: _SRC2_* forms
+
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>;
def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>;
@@ -744,32 +846,46 @@ defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMA
//def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>;
//def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>;
//def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>;
-defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>;
-defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>;
-defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>;
-defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>;
-defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
+ 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global
+>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
+ 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global
+>;
+defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
+ 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global
+>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
+ 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global
+>;
+defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
+ 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
+ 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load
+>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
+ 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load
+>;
def BUFFER_STORE_BYTE : MUBUF_Store_Helper <
- 0x00000018, "BUFFER_STORE_BYTE", VReg_32
+ 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global
>;
def BUFFER_STORE_SHORT : MUBUF_Store_Helper <
- 0x0000001a, "BUFFER_STORE_SHORT", VReg_32
+ 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global
>;
def BUFFER_STORE_DWORD : MUBUF_Store_Helper <
- 0x0000001c, "BUFFER_STORE_DWORD", VReg_32
+ 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store
>;
def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
- 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64
+ 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store
>;
def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
- 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128
+ 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store
>;
//def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>;
//def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>;
@@ -885,31 +1001,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>;
//def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>;
//def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>;
-//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>;
-//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>;
-//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>;
-//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>;
-//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>;
-//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>;
-//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>;
-//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>;
-//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>;
-//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>;
-//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>;
-//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>;
-//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>;
-//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>;
-//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>;
-//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>;
-//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>;
-//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>;
-//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>;
-//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>;
-//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>;
-//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>;
-//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>;
-//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>;
-//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>;
+defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">;
+defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">;
+defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">;
+defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">;
+defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">;
+defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">;
+defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">;
+defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">;
+defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">;
+defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">;
+defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">;
+defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">;
+defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">;
+defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">;
+defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">;
+defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">;
+defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">;
+defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">;
+defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">;
+defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">;
+defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">;
+defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">;
+defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">;
+defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">;
//def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>;
//def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>;
//def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>;
@@ -962,8 +1078,12 @@ defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32",
[(set i32:$dst, (fp_to_sint f32:$src0))]
>;
defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>;
-////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>;
-//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>;
+defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32",
+ [(set i32:$dst, (f32_to_f16 f32:$src0))]
+>;
+defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16",
+ [(set f32:$dst, (f16_to_f32 i32:$src0))]
+>;
//defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>;
//defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>;
//defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>;
@@ -973,10 +1093,18 @@ defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64",
defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
[(set f64:$dst, (fextend f32:$src0))]
>;
-//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>;
-//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
-//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
-//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
+defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))]
+>;
+defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3",
+ [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))]
+>;
defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
[(set i32:$dst, (fp_to_uint f64:$src0))]
>;
@@ -988,7 +1116,7 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32",
- [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))]
+ [(set f32:$dst, (ftrunc f32:$src0))]
>;
defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32",
[(set f32:$dst, (fceil f32:$src0))]
@@ -1006,24 +1134,33 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>;
defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32",
[(set f32:$dst, (flog2 f32:$src0))]
>;
+
defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>;
defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>;
defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32",
- [(set f32:$dst, (fdiv FP_ONE, f32:$src0))]
+ [(set f32:$dst, (AMDGPUrcp f32:$src0))]
>;
defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>;
-defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>;
+defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32",
+ [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))]
+>;
defm V_RSQ_LEGACY_F32 : VOP1_32 <
0x0000002d, "V_RSQ_LEGACY_F32",
- [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))]
+ [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))]
+>;
+defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32",
+ [(set f32:$dst, (AMDGPUrsq f32:$src0))]
>;
-defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>;
defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64",
- [(set f64:$dst, (fdiv FP_ONE, f64:$src0))]
+ [(set f64:$dst, (AMDGPUrcp f64:$src0))]
>;
defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>;
-defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>;
-defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>;
+defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64",
+ [(set f64:$dst, (AMDGPUrsq f64:$src0))]
+>;
+defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64",
+ [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))]
+>;
defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32",
[(set f32:$dst, (fsqrt f32:$src0))]
>;
@@ -1211,7 +1348,7 @@ defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32",
defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>;
defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>;
defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>;
-//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
+defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>;
defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>;
defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
@@ -1303,16 +1440,20 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
-def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32",
+ [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64",
+ [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))]
+>;
-def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
+def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64",
[(set i64:$dst, (shl i64:$src0, i32:$src1))]
>;
-def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64",
+def V_LSHR_B64 : VOP3_64_32 <0x00000162, "V_LSHR_B64",
[(set i64:$dst, (srl i64:$src0, i32:$src1))]
>;
-def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64",
+def V_ASHR_I64 : VOP3_64_32 <0x00000163, "V_ASHR_I64",
[(set i64:$dst, (sra i64:$src0, i32:$src1))]
>;
@@ -1336,14 +1477,23 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
} // isCommutable = 1
-defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
-def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
-def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
+def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+
+// Double precision division pre-scale.
+def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
+
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32",
+ [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))]
+>;
+def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64",
+ [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))]
+>;
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
-def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
+def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64",
+ [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))]
+>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
@@ -1500,7 +1650,7 @@ let usesCustomInserter = 1 in {
// constant that can be used with the ADDR64 MUBUF instructions.
def SI_ADDR64_RSRC : InstSI <
(outs SReg_128:$srsrc),
- (ins SReg_64:$ptr),
+ (ins SSrc_64:$ptr),
"", []
>;
@@ -1508,7 +1658,7 @@ def V_SUB_F64 : InstSI <
(outs VReg_64:$dst),
(ins VReg_64:$src0, VReg_64:$src1),
"V_SUB_F64 $dst, $src0, $src1",
- []
+ [(set f64:$dst, (fsub f64:$src0, f64:$src1))]
>;
} // end usesCustomInserter
@@ -1529,6 +1679,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
}
+defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
@@ -1552,7 +1703,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
@@ -1564,11 +1715,6 @@ def : Pat <
$src0, $src1, $src2, $src3)
>;
-def : Pat <
- (f64 (fsub f64:$src0, f64:$src1)),
- (V_SUB_F64 $src0, $src1)
->;
-
//===----------------------------------------------------------------------===//
// SMRD Patterns
//===----------------------------------------------------------------------===//
@@ -1596,7 +1742,6 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
@@ -1615,6 +1760,24 @@ def : Pat <
(S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
>;
+} // Predicates = [isSI] in {
+
+//===----------------------------------------------------------------------===//
+// SOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isSI, isCFDepth0] in {
+
+def : Pat <
+ (i64 (ctpop i64:$src)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_BCNT1_I32_B64 $src), sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+} // Predicates = [isSI, isCFDepth0]
+
+let Predicates = [isSI] in {
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
@@ -1625,18 +1788,39 @@ def : Pat <
>;
//===----------------------------------------------------------------------===//
-// VOP2 Patterns
+// SOPP Patterns
//===----------------------------------------------------------------------===//
def : Pat <
- (or i64:$src0, i64:$src1),
+ (int_AMDGPU_barrier_global),
+ (S_BARRIER)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : RcpPat<V_RCP_F32_e32, f32>;
+def : RcpPat<V_RCP_F64_e32, f64>;
+defm : RsqPat<V_RSQ_F32_e32, f32>;
+defm : RsqPat<V_RSQ_F64_e32, f64>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+class BinOp64Pat <SDNode node, Instruction inst> : Pat <
+ (node i64:$src0, i64:$src1),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
+ (inst (EXTRACT_SUBREG i64:$src0, sub0),
(EXTRACT_SUBREG i64:$src1, sub0)), sub0),
- (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
+ (inst (EXTRACT_SUBREG i64:$src0, sub1),
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
>;
+def : BinOp64Pat <or, V_OR_B32_e32>;
+def : BinOp64Pat <xor, V_XOR_B32_e32>;
+
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
(sext_inreg i32:$src0, vt),
(V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
@@ -1645,10 +1829,82 @@ class SextInReg <ValueType vt, int ShiftAmt> : Pat <
def : SextInReg <i8, 24>;
def : SextInReg <i16, 16>;
+def : Pat <
+ (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+ (V_BCNT_U32_B32_e32 $popcnt, $val)
+>;
+
+def : Pat <
+ (i32 (ctpop i32:$popcnt)),
+ (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0)
+>;
+
+def : Pat <
+ (i64 (ctpop i64:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1),
+ (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)),
+ sub0),
+ (V_MOV_B32_e32 0), sub1)
+>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+ (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm,
+ i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+ (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da),
+ (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc),
+ $addr, $rsrc, $sampler)
+>;
+
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
/* SIsample for simple 1D texture lookup */
def : Pat <
(SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
@@ -1864,7 +2120,10 @@ def : BitConvert <v2f32, v2i32, VReg_64>;
def : BitConvert <v2i32, v2f32, VReg_64>;
def : BitConvert <v2i32, i64, VReg_64>;
def : BitConvert <i64, v2i32, VReg_64>;
-
+def : BitConvert <v2f32, i64, VReg_64>;
+def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
@@ -1894,7 +2153,7 @@ def FCLAMP_SI : AMDGPUShaderInst <
}
def : Pat <
- (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
+ (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
(FCLAMP_SI f32:$src)
>;
@@ -2106,7 +2365,7 @@ def : Pat <
(V_MUL_HI_I32 $src0, $src1, (i32 0))
>;
-defm : BFIPatterns <V_BFI_B32>;
+defm : BFIPatterns <V_BFI_B32, S_MOV_B32>;
def : ROTRPattern <V_ALIGNBIT_B32>;
/********** ======================= **********/
@@ -2130,7 +2389,7 @@ defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>;
defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>;
defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>;
defm : DSReadPat <DS_READ_B32, i32, local_load>;
-defm : DSReadPat <DS_READ_B64, i64, local_load>;
+defm : DSReadPat <DS_READ_B64, v2i32, local_load>;
multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
@@ -2139,48 +2398,109 @@ multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> {
>;
def : Pat <
- (frag vt:$src1, i32:$src0),
- (inst 0, $src0, $src1, 0)
+ (frag vt:$val, i32:$ptr),
+ (inst 0, $ptr, $val, 0)
>;
}
defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>;
defm : DSWritePat <DS_WRITE_B32, i32, local_store>;
-defm : DSWritePat <DS_WRITE_B64, i64, local_store>;
+defm : DSWritePat <DS_WRITE_B64, v2i32, local_store>;
-def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
- (DS_ADD_U32_RTN 0, $ptr, $val, 0)>;
-
-def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
- (DS_SUB_U32_RTN 0, $ptr, $val, 0)>;
+multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> {
+ def : Pat <
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value),
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ >;
-//===----------------------------------------------------------------------===//
-// MUBUF Patterns
-//===----------------------------------------------------------------------===//
+ def : Pat <
+ (frag i32:$ptr, vt:$val),
+ (inst 0, $ptr, $val, 0)
+ >;
+}
-multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
- PatFrag global_ld, PatFrag constant_ld> {
+// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
+//
+// We need to use something for the data0, so we set a register to
+// -1. For the non-rtn variants, the manual says it does
+// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
+// will always do the increment so I'm assuming it's the same.
+//
+// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
+// needs to be a VGPR. The SGPR copy pass will fix this, and it's
+// easier since there is no v_mov_b64.
+multiclass DSAtomicIncRetPat<DS inst, ValueType vt,
+ Instruction LoadImm, PatFrag frag> {
def : Pat <
- (vt (global_ld (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)),
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
>;
def : Pat <
- (vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))),
- (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
+ (frag i32:$ptr, (vt 1)),
+ (inst 0, $ptr, (LoadImm (vt -1)), 0)
>;
+}
+multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> {
def : Pat <
- (vt (global_ld i64:$ptr)),
- (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
+ (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap),
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
>;
def : Pat <
- (vt (global_ld (add i64:$ptr, i64:$offset))),
- (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
+ (frag i32:$ptr, vt:$cmp, vt:$swap),
+ (inst 0, $ptr, $cmp, $swap, 0)
>;
+}
+
+
+// 32-bit atomics.
+defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_add_local>;
+defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
+ S_MOV_B32, atomic_load_sub_local>;
+
+defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>;
+defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>;
+defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>;
+defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>;
+defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>;
+defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>;
+
+defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>;
+
+// 64-bit atomics.
+defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_add_local>;
+defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
+ S_MOV_B64, atomic_load_sub_local>;
+
+defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>;
+defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>;
+defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>;
+defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>;
+defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>;
+defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>;
+defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>;
+defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>;
+
+defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
+ PatFrag constant_ld> {
def : Pat <
(vt (constant_ld (add i64:$ptr, i64:$offset))),
(Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0)
@@ -2188,53 +2508,19 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
}
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32,
- sextloadi8_global, sextloadi8_constant>;
+ sextloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32,
- az_extloadi8_global, az_extloadi8_constant>;
+ az_extloadi8_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32,
- sextloadi16_global, sextloadi16_constant>;
+ sextloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32,
- az_extloadi16_global, az_extloadi16_constant>;
+ az_extloadi16_constant>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32,
- global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- global_load, constant_load>;
-defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64,
- az_extloadi32_global, az_extloadi32_constant>;
+ constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32,
- global_load, constant_load>;
+ constant_load>;
defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32,
- global_load, constant_load>;
-
-multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> {
-
- def : Pat <
- (st vt:$value, (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset)),
- (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset))
- >;
-
- def : Pat <
- (st vt:$value, (add i64:$ptr, IMM12bit:$offset)),
- (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset))
- >;
-
- def : Pat <
- (st vt:$value, i64:$ptr),
- (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0)
- >;
-
- def : Pat <
- (st vt:$value, (add i64:$ptr, i64:$offset)),
- (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0)
- >;
-}
-
-defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
-defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
+ constant_load>;
// BUFFER_LOAD_DWORD*, addr64=0
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
@@ -2301,7 +2587,7 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-let Predicates = [isCI] in {
+let SubtargetPredicate = isCI in {
// Sea island new arithmetic instructinos
let neverHasSideEffects = 1 in {
@@ -2348,7 +2634,7 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>;
// BUFFER_LOAD_DWORDX3
// BUFFER_STORE_DWORDX3
-} // End Predicates = [isCI]
+} // End iSCI
/********** ====================== **********/
@@ -2360,13 +2646,13 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I
// 1. Extract with offset
def : Pat<
(vector_extract vt:$vec, (add i32:$idx, imm:$off)),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
+ (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off))
>;
// 2. Extract without offset
def : Pat<
(vector_extract vt:$vec, i32:$idx),
- (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
+ (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0))
>;
// 3. Insert with offset
@@ -2392,20 +2678,6 @@ defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
-/********** =============== **********/
-/********** Conditions **********/
-/********** =============== **********/
-
-def : Pat<
- (i1 (setcc f32:$src0, f32:$src1, SETO)),
- (V_CMP_O_F32_e64 $src0, $src1)
->;
-
-def : Pat<
- (i1 (setcc f32:$src0, f32:$src1, SETUO)),
- (V_CMP_U_F32_e64 $src0, $src1)
->;
-
//===----------------------------------------------------------------------===//
// Conversion Patterns
//===----------------------------------------------------------------------===//
@@ -2439,6 +2711,62 @@ def : Pat <
(S_MOV_B32 -1), sub1)
>;
+class ZExt_i64_i32_Pat <SDNode ext> : Pat <
+ (i64 (ext i32:$src)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+class ZExt_i64_i1_Pat <SDNode ext> : Pat <
+ (i64 (ext i1:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0),
+ (S_MOV_B32 0), sub1)
+>;
+
+
+def : ZExt_i64_i32_Pat<zext>;
+def : ZExt_i64_i32_Pat<anyext>;
+def : ZExt_i64_i1_Pat<zext>;
+def : ZExt_i64_i1_Pat<anyext>;
+
+def : Pat <
+ (i64 (sext i32:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0),
+ (S_ASHR_I32 $src, 31), sub1)
+>;
+
+def : Pat <
+ (i64 (sext i1:$src)),
+ (INSERT_SUBREG
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)),
+ (V_CNDMASK_B32_e64 0, -1, $src), sub0),
+ (V_CNDMASK_B32_e64 0, -1, $src), sub1)
+>;
+
+def : Pat <
+ (f32 (sint_to_fp i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
+>;
+
+def : Pat <
+ (f32 (uint_to_fp i1:$src)),
+ (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)
+>;
+
+def : Pat <
+ (f64 (sint_to_fp i1:$src)),
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+>;
+
+def : Pat <
+ (f64 (uint_to_fp i1:$src)),
+ (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+>;
+
//===----------------------------------------------------------------------===//
// Miscellaneous Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
index 00e32c0..df690a4 100644
--- a/lib/Target/R600/SIIntrinsics.td
+++ b/lib/Target/R600/SIIntrinsics.td
@@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in {
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+ // Fully-flexible SAMPLE instruction.
+ class SampleRaw : Intrinsic <
+ [llvm_v4f32_ty], // vdata(VGPR)
+ [llvm_anyint_ty, // vaddr(VGPR)
+ llvm_v32i8_ty, // rsrc(SGPR)
+ llvm_v16i8_ty, // sampler(SGPR)
+ llvm_i32_ty, // dmask(imm)
+ llvm_i32_ty, // unorm(imm)
+ llvm_i32_ty, // r128(imm)
+ llvm_i32_ty, // da(imm)
+ llvm_i32_ty, // glc(imm)
+ llvm_i32_ty, // slc(imm)
+ llvm_i32_ty, // tfe(imm)
+ llvm_i32_ty], // lwe(imm)
+ [IntrNoMem]>;
+
def int_SI_sample : Sample;
def int_SI_sampleb : Sample;
def int_SI_sampled : Sample;
def int_SI_samplel : Sample;
+ // Basic gather4
+ def int_SI_gather4 : SampleRaw;
+ def int_SI_gather4_cl : SampleRaw;
+ def int_SI_gather4_l : SampleRaw;
+ def int_SI_gather4_b : SampleRaw;
+ def int_SI_gather4_b_cl : SampleRaw;
+ def int_SI_gather4_lz : SampleRaw;
+
+ // Gather4 with comparison
+ def int_SI_gather4_c : SampleRaw;
+ def int_SI_gather4_c_cl : SampleRaw;
+ def int_SI_gather4_c_l : SampleRaw;
+ def int_SI_gather4_c_b : SampleRaw;
+ def int_SI_gather4_c_b_cl : SampleRaw;
+ def int_SI_gather4_c_lz : SampleRaw;
+
+ // Gather4 with offsets
+ def int_SI_gather4_o : SampleRaw;
+ def int_SI_gather4_cl_o : SampleRaw;
+ def int_SI_gather4_l_o : SampleRaw;
+ def int_SI_gather4_b_o : SampleRaw;
+ def int_SI_gather4_b_cl_o : SampleRaw;
+ def int_SI_gather4_lz_o : SampleRaw;
+
+ // Gather4 with comparison and offsets
+ def int_SI_gather4_c_o : SampleRaw;
+ def int_SI_gather4_c_cl_o : SampleRaw;
+ def int_SI_gather4_c_l_o : SampleRaw;
+ def int_SI_gather4_c_b_o : SampleRaw;
+ def int_SI_gather4_c_b_cl_o : SampleRaw;
+ def int_SI_gather4_c_lz_o : SampleRaw;
+
+ def int_SI_getlod : SampleRaw;
+
def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index 6601f2a..9f5ff29 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -86,6 +86,7 @@ private:
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
+ void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
@@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
MI.eraseFromParent();
}
+/// The m0 register stores the maximum allowable address for LDS reads and
+/// writes. Its value must be at least the size in bytes of LDS allocated by
+/// the shader. For simplicity, we set it to the maximum possible value.
+void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
+ AMDGPU::M0).addImm(0xffffffff);
+}
+
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
@@ -333,52 +342,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(Idx);
MBB.insert(I, MovRel);
- MI.eraseFromParent();
- return;
- }
+ } else {
- assert(AMDGPU::SReg_64RegClass.contains(Save));
- assert(AMDGPU::VReg_32RegClass.contains(Idx));
+ assert(AMDGPU::SReg_64RegClass.contains(Save));
+ assert(AMDGPU::VReg_32RegClass.contains(Idx));
- // Save the EXEC mask
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
- .addReg(AMDGPU::EXEC);
+ // Save the EXEC mask
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save)
+ .addReg(AMDGPU::EXEC);
- // Read the next variant into VCC (lower 32 bits) <- also loop target
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
- AMDGPU::VCC_LO)
- .addReg(Idx);
+ // Read the next variant into VCC (lower 32 bits) <- also loop target
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
+ AMDGPU::VCC_LO)
+ .addReg(Idx);
- // Move index from VCC into M0
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
- .addReg(AMDGPU::VCC_LO);
+ // Move index from VCC into M0
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+ .addReg(AMDGPU::VCC_LO);
- // Compare the just read M0 value to all possible Idx values
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
- .addReg(AMDGPU::M0)
- .addReg(Idx);
+ // Compare the just read M0 value to all possible Idx values
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
+ .addReg(AMDGPU::M0)
+ .addReg(Idx);
- // Update EXEC, save the original EXEC value to VCC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, save the original EXEC value to VCC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
+ .addReg(AMDGPU::VCC);
- // Do the actual move
- MBB.insert(I, MovRel);
+ // Do the actual move
+ MBB.insert(I, MovRel);
- // Update EXEC, switch all done bits to 0 and all todo bits to 1
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ // Update EXEC, switch all done bits to 0 and all todo bits to 1
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+ .addReg(AMDGPU::EXEC)
+ .addReg(AMDGPU::VCC);
- // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
- .addImm(-7)
- .addReg(AMDGPU::EXEC);
+ // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addImm(-7)
+ .addReg(AMDGPU::EXEC);
- // Restore EXEC
- BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
- .addReg(Save);
+ // Restore EXEC
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+ .addReg(Save);
+ }
+ // FIXME: Are there any values other than the LDS address clamp that need to
+ // be stored in the m0 register and may be live for more than a few
+ // instructions? If so, we should save the m0 register at the beginning
+ // of this function and restore it here.
+ // FIXME: Add support for LDS direct loads.
+ InitM0ForLDS(&MI);
MI.eraseFromParent();
}
@@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock &MBB = MF.front();
// Initialize M0 to a value that won't cause LDS access to be discarded
// due to offset clamping
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
+ InitM0ForLDS(MBB.getFirstNonPHI());
}
if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) {
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index af60995..e2df950 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -62,8 +62,10 @@ static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
return VGPR;
}
}
- MF->getFunction()->getContext().emitError(
- "Could not found S_ENGPGM instrtuction.");
+
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ Ctx.emitError("Could not find S_ENDPGM instruction.");
+
return VGPR;
}
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index c72d549..d0b677a 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -14,21 +14,20 @@
#include "SIRegisterInfo.h"
-#include "AMDGPUTargetMachine.h"
+#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm)
-: AMDGPURegisterInfo(tm),
- TM(tm)
+SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
+: AMDGPURegisterInfo(st)
{ }
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(AMDGPU::EXEC);
Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
- const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo());
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
TII->reserveIndirectRegisters(Reserved, MF);
return Reserved;
}
@@ -38,15 +37,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
return RC->getNumRegs();
}
-const TargetRegisterClass *
-SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const {
- switch (rc->getID()) {
- case AMDGPU::GPRF32RegClassID:
- return &AMDGPU::VReg_32RegClass;
- default: return rc;
- }
-}
-
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
MVT VT) const {
switch(VT.SimpleTy) {
@@ -135,3 +125,19 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
unsigned Index = getHWRegIndex(Reg);
return SubRC->getRegister(Index + Channel);
}
+
+bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const {
+ switch (RCID) {
+ default: return false;
+ case AMDGPU::SSrc_32RegClassID:
+ case AMDGPU::SSrc_64RegClassID:
+ case AMDGPU::VSrc_32RegClassID:
+ case AMDGPU::VSrc_64RegClassID:
+ return true;
+ }
+}
+
+bool SIRegisterInfo::regClassCanUseImmediate(
+ const TargetRegisterClass *RC) const {
+ return regClassCanUseImmediate(RC->getID());
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 36b4fcd..c9305fb 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -20,24 +20,15 @@
namespace llvm {
-class AMDGPUTargetMachine;
-
struct SIRegisterInfo : public AMDGPURegisterInfo {
- AMDGPUTargetMachine &TM;
- SIRegisterInfo(AMDGPUTargetMachine &tm);
+ SIRegisterInfo(const AMDGPUSubtarget &st);
BitVector getReservedRegs(const MachineFunction &MF) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
- /// \param RC is an AMDIL reg class.
- ///
- /// \returns the SI register class that is equivalent to \p RC.
- const TargetRegisterClass *
- getISARegClass(const TargetRegisterClass *RC) const override;
-
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
@@ -69,6 +60,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// \returns The sub-register of Reg that is in Channel.
unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
unsigned Channel) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// inline immediates.
+ bool regClassCanUseImmediate(int RCID) const;
+
+ /// \returns True if operands defined with this register class can accept
+ /// inline immediates.
+ bool regClassCanUseImmediate(const TargetRegisterClass *RC) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index f1f01de..8974b63 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
(add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index a0b6907..367963a 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -119,8 +119,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
Type::getInt32Ty(I.getContext())){
Type *ElementTy = Arg->getType()->getVectorElementType();
std::string TypeName = "i32";
- InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg);
- assert(Def);
+ InsertElementInst *Def = cast<InsertElementInst>(Arg);
Args.push_back(Def->getOperand(1));
Types.push_back(ElementTy);
std::string VecTypeName = "v1" + TypeName;
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index da88820..9df0054 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -47,31 +47,27 @@ class SparcAsmParser : public MCTargetAsmParser {
// public interface of the MCTargetAsmParser.
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
// Custom parse functions for Sparc specific operands.
- OperandMatchResultTy
- parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ OperandMatchResultTy parseMEMOperand(OperandVector &Operands);
- OperandMatchResultTy
- parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Name);
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name);
OperandMatchResultTy
- parseSparcAsmOperand(SparcOperand *&Operand, bool isCall = false);
+ parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Operand,
+ bool isCall = false);
- OperandMatchResultTy
- parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
// returns true if Tok is matched to a register and returns register in RegNo.
bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
@@ -153,8 +149,6 @@ private:
SMLoc StartLoc, EndLoc;
- SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
-
struct Token {
const char *Data;
unsigned Length;
@@ -182,6 +176,8 @@ private:
struct MemOp Mem;
};
public:
+ SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
bool isToken() const override { return Kind == k_Token; }
bool isReg() const override { return Kind == k_Register; }
bool isImm() const override { return Kind == k_Immediate; }
@@ -291,8 +287,8 @@ public:
addExpr(Inst, Expr);
}
- static SparcOperand *CreateToken(StringRef Str, SMLoc S) {
- SparcOperand *Op = new SparcOperand(k_Token);
+ static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) {
+ auto Op = make_unique<SparcOperand>(k_Token);
Op->Tok.Data = Str.data();
Op->Tok.Length = Str.size();
Op->StartLoc = S;
@@ -300,10 +296,9 @@ public:
return Op;
}
- static SparcOperand *CreateReg(unsigned RegNum,
- unsigned Kind,
- SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_Register);
+ static std::unique_ptr<SparcOperand> CreateReg(unsigned RegNum, unsigned Kind,
+ SMLoc S, SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_Register);
Op->Reg.RegNum = RegNum;
Op->Reg.Kind = (SparcOperand::RegisterKind)Kind;
Op->StartLoc = S;
@@ -311,49 +306,51 @@ public:
return Op;
}
- static SparcOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_Immediate);
+ static std::unique_ptr<SparcOperand> CreateImm(const MCExpr *Val, SMLoc S,
+ SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
return Op;
}
- static SparcOperand *MorphToDoubleReg(SparcOperand *Op) {
- unsigned Reg = Op->getReg();
- assert(Op->Reg.Kind == rk_FloatReg);
+ static bool MorphToDoubleReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
+ assert(Op.Reg.Kind == rk_FloatReg);
unsigned regIdx = Reg - Sparc::F0;
if (regIdx % 2 || regIdx > 31)
- return nullptr;
- Op->Reg.RegNum = DoubleRegs[regIdx / 2];
- Op->Reg.Kind = rk_DoubleReg;
- return Op;
+ return false;
+ Op.Reg.RegNum = DoubleRegs[regIdx / 2];
+ Op.Reg.Kind = rk_DoubleReg;
+ return true;
}
- static SparcOperand *MorphToQuadReg(SparcOperand *Op) {
- unsigned Reg = Op->getReg();
+ static bool MorphToQuadReg(SparcOperand &Op) {
+ unsigned Reg = Op.getReg();
unsigned regIdx = 0;
- switch (Op->Reg.Kind) {
- default: assert(0 && "Unexpected register kind!");
+ switch (Op.Reg.Kind) {
+ default: llvm_unreachable("Unexpected register kind!");
case rk_FloatReg:
regIdx = Reg - Sparc::F0;
if (regIdx % 4 || regIdx > 31)
- return nullptr;
+ return false;
Reg = QuadFPRegs[regIdx / 4];
break;
case rk_DoubleReg:
regIdx = Reg - Sparc::D0;
if (regIdx % 2 || regIdx > 31)
- return nullptr;
+ return false;
Reg = QuadFPRegs[regIdx / 2];
break;
}
- Op->Reg.RegNum = Reg;
- Op->Reg.Kind = rk_QuadReg;
- return Op;
+ Op.Reg.RegNum = Reg;
+ Op.Reg.Kind = rk_QuadReg;
+ return true;
}
- static SparcOperand *MorphToMEMrr(unsigned Base, SparcOperand *Op) {
+ static std::unique_ptr<SparcOperand>
+ MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) {
unsigned offsetReg = Op->getReg();
Op->Kind = k_MemoryReg;
Op->Mem.Base = Base;
@@ -362,10 +359,9 @@ public:
return Op;
}
- static SparcOperand *CreateMEMri(unsigned Base,
- const MCExpr *Off,
- SMLoc S, SMLoc E) {
- SparcOperand *Op = new SparcOperand(k_MemoryImm);
+ static std::unique_ptr<SparcOperand>
+ CreateMEMri(unsigned Base, const MCExpr *Off, SMLoc S, SMLoc E) {
+ auto Op = make_unique<SparcOperand>(k_MemoryImm);
Op->Mem.Base = Base;
Op->Mem.OffsetReg = 0;
Op->Mem.Off = Off;
@@ -374,7 +370,8 @@ public:
return Op;
}
- static SparcOperand *MorphToMEMri(unsigned Base, SparcOperand *Op) {
+ static std::unique_ptr<SparcOperand>
+ MorphToMEMri(unsigned Base, std::unique_ptr<SparcOperand> Op) {
const MCExpr *Imm = Op->getImm();
Op->Kind = k_MemoryImm;
Op->Mem.Base = Base;
@@ -386,11 +383,11 @@ public:
} // end namespace
-bool SparcAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
SmallVector<MCInst, 8> Instructions;
unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
@@ -415,7 +412,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((SparcOperand*) Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((SparcOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -450,11 +447,9 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc)
static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features,
unsigned VariantID);
-bool SparcAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands)
-{
+bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
// First operand in MCInst is instruction mnemonic.
Operands.push_back(SparcOperand::CreateToken(Name, NameLoc));
@@ -548,9 +543,8 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
-{
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
SMLoc S, E;
unsigned BaseReg = 0;
@@ -575,23 +569,20 @@ parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
break;
}
- SparcOperand *Offset = nullptr;
+ std::unique_ptr<SparcOperand> Offset;
OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset);
if (ResTy != MatchOperand_Success || !Offset)
return MatchOperand_NoMatch;
- Offset = (Offset->isImm()
- ? SparcOperand::MorphToMEMri(BaseReg, Offset)
- : SparcOperand::MorphToMEMrr(BaseReg, Offset));
+ Operands.push_back(
+ Offset->isImm() ? SparcOperand::MorphToMEMri(BaseReg, std::move(Offset))
+ : SparcOperand::MorphToMEMrr(BaseReg, std::move(Offset)));
- Operands.push_back(Offset);
return MatchOperand_Success;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic)
-{
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -637,21 +628,21 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return MatchOperand_Success;
}
- SparcOperand *Op = nullptr;
+ std::unique_ptr<SparcOperand> Op;
ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call"));
if (ResTy != MatchOperand_Success || !Op)
return MatchOperand_ParseFail;
// Push the parsed operand into the list of operands
- Operands.push_back(Op);
+ Operands.push_back(std::move(Op));
return MatchOperand_Success;
}
SparcAsmParser::OperandMatchResultTy
-SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall)
-{
+SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
+ bool isCall) {
SMLoc S = Parser.getTok().getLoc();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -718,8 +709,8 @@ SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall)
return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
}
-SparcAsmParser::OperandMatchResultTy SparcAsmParser::
-parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+SparcAsmParser::OperandMatchResultTy
+SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
// parse (,a|,pn|,pt)+
@@ -928,18 +919,14 @@ extern "C" void LLVMInitializeSparcAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "SparcGenAsmMatcher.inc"
-
-
-unsigned SparcAsmParser::
-validateTargetOperandClass(MCParsedAsmOperand *GOp,
- unsigned Kind)
-{
- SparcOperand *Op = (SparcOperand*)GOp;
- if (Op->isFloatOrDoubleReg()) {
+unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
+ unsigned Kind) {
+ SparcOperand &Op = (SparcOperand &)GOp;
+ if (Op.isFloatOrDoubleReg()) {
switch (Kind) {
default: break;
case MCK_DFPRegs:
- if (!Op->isFloatReg() || SparcOperand::MorphToDoubleReg(Op))
+ if (!Op.isFloatReg() || SparcOperand::MorphToDoubleReg(Op))
return MCTargetAsmParser::Match_Success;
break;
case MCK_QFPRegs:
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 261fb38..5975a51 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
raw_ostream &O)
{
- assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX.");
+ llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
return true;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 7d517b6..dcd81e3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -196,12 +196,12 @@ namespace {
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout) const override {
// FIXME.
- assert(0 && "fixupNeedsRelaxation() unimplemented");
+ llvm_unreachable("fixupNeedsRelaxation() unimplemented");
return false;
}
void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
// FIXME.
- assert(0 && "relaxInstruction() unimplemented");
+ llvm_unreachable("relaxInstruction() unimplemented");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index b19ad7b..eea9626 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
if (Expr->EvaluateAsAbsolute(Res))
return Res;
- assert(0 && "Unhandled expression!");
+ llvm_unreachable("Unhandled expression!");
return 0;
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index ae57fdc..7f01ab0 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Object/ELF.h"
@@ -124,7 +125,7 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name)
Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) {
switch (Kind) {
- default: assert(0 && "Unhandled SparcMCExpr::VariantKind");
+ default: llvm_unreachable("Unhandled SparcMCExpr::VariantKind");
case VK_Sparc_LO: return Sparc::fixup_sparc_lo10;
case VK_Sparc_HI: return Sparc::fixup_sparc_hi22;
case VK_Sparc_H44: return Sparc::fixup_sparc_h44;
@@ -219,35 +220,6 @@ void SparcMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that at least three other backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void SparcMCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
+void SparcMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index 78dd945..f0d0ef3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -88,7 +88,7 @@ public:
void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout) const override;
- void AddValueSymbols(MCAssembler *) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index a37da94..3cdfda3 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -14,6 +14,7 @@
#include "SparcFrameLowering.h"
#include "SparcInstrInfo.h"
#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,6 +33,9 @@ DisableLeafProc("disable-sparc-leaf-proc",
cl::desc("Disable Sparc leaf procedure optimization."),
cl::Hidden);
+SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {}
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
@@ -99,7 +103,9 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
SAVEri = SP::ADDri;
SAVErr = SP::ADDrr;
}
- NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes);
+ NumBytes =
+ -MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
+ NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
MachineModuleInfo &MMI = MF.getMMI();
@@ -162,7 +168,8 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
if (NumBytes == 0)
return;
- NumBytes = SubTarget.getAdjustedFrameSize(NumBytes);
+ NumBytes = MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize(
+ NumBytes);
emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index bda7b7c..a7d1b89 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -15,19 +15,14 @@
#define SPARC_FRAMEINFO_H
#include "Sparc.h"
-#include "SparcSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
- class SparcSubtarget;
+class SparcSubtarget;
class SparcFrameLowering : public TargetFrameLowering {
- const SparcSubtarget &SubTarget;
public:
- explicit SparcFrameLowering(const SparcSubtarget &ST)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
- ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8),
- SubTarget(ST) {}
+ explicit SparcFrameLowering(const SparcSubtarget &ST);
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index ef61466..990f52a 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2030,7 +2030,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
}
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(SDLoc(Op)).setChain(Chain)
- .setCallee(CallingConv::C, RetTyABI, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTyABI, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -2086,7 +2086,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain)
- .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
index c775e9e..d0eec98 100644
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -213,7 +213,8 @@ extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) {
void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
- assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction");
+ llvm_unreachable("FIXME: Implement SparcJITInfo::"
+ "replaceMachineCodeForFunction");
}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index eb36d29..a308fc5 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "SparcTargetMachine.h"
+#include "SparcSelectionDAGInfo.h"
using namespace llvm;
#define DEBUG_TYPE "sparc-selectiondag-info"
-SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {
}
SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index dcd4203..2346f41 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class SparcTargetMachine;
class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+ explicit SparcSelectionDAGInfo(const DataLayout &DL);
~SparcSelectionDAGInfo();
};
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index e38fb02..eea0c8c 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -26,20 +26,44 @@ using namespace llvm;
void SparcSubtarget::anchor() { }
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64Bit) :
- SparcGenSubtargetInfo(TT, CPU, FS),
- IsV9(false),
- V8DeprecatedInsts(false),
- IsVIS(false),
- Is64Bit(is64Bit),
- HasHardQuad(false),
- UsePopc(false) {
+static std::string computeDataLayout(const SparcSubtarget &ST) {
+ // Sparc is big endian.
+ std::string Ret = "E-m:e";
+
+ // Some ABIs have 32bit pointers.
+ if (!ST.is64Bit())
+ Ret += "-p:32:32";
+
+ // Alignments for 64 bit integers.
+ Ret += "-i64:64";
+
+ // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
+ // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
+ if (ST.is64Bit())
+ Ret += "-n32:64";
+ else
+ Ret += "-f128:64-n32";
+
+ if (ST.is64Bit())
+ Ret += "-S128";
+ else
+ Ret += "-S64";
+
+ return Ret;
+}
+
+SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ IsV9 = false;
+ V8DeprecatedInsts = false;
+ IsVIS = false;
+ HasHardQuad = false;
+ UsePopc = false;
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
- CPUName = (is64Bit) ? "v9" : "v8";
+ CPUName = (Is64Bit) ? "v9" : "v8";
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
@@ -47,8 +71,16 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
// Popc is a v9-only instruction.
if (!IsV9)
UsePopc = false;
+
+ return *this;
}
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, TargetMachine &TM,
+ bool is64Bit)
+ : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit),
+ DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))),
+ InstrInfo(*this), TLInfo(TM), TSInfo(DL), FrameLowering(*this) {}
int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 4025622..a335778 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -14,6 +14,13 @@
#ifndef SPARC_SUBTARGET_H
#define SPARC_SUBTARGET_H
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcJITInfo.h"
+#include "SparcSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -31,10 +38,26 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
bool Is64Bit;
bool HasHardQuad;
bool UsePopc;
+ const DataLayout DL; // Calculates type size & alignment
+ SparcInstrInfo InstrInfo;
+ SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
+ SparcFrameLowering FrameLowering;
+ SparcJITInfo JITInfo;
public:
SparcSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, bool is64bit);
+ const std::string &FS, TargetMachine &TM, bool is64bit);
+
+ const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const SparcRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const SparcTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ SparcJITInfo *getJITInfo() { return &JITInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
bool isV9() const { return IsV9; }
bool isVIS() const { return IsVIS; }
@@ -47,6 +70,7 @@ public:
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ SparcSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
bool is64Bit() const { return Is64Bit; }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 2469d93..0130fac 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -23,32 +23,6 @@ extern "C" void LLVMInitializeSparcTarget() {
RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
}
-static std::string computeDataLayout(const SparcSubtarget &ST) {
- // Sparc is big endian.
- std::string Ret = "E-m:e";
-
- // Some ABIs have 32bit pointers.
- if (!ST.is64Bit())
- Ret += "-p:32:32";
-
- // Alignments for 64 bit integers.
- Ret += "-i64:64";
-
- // On SparcV9 128 floats are aligned to 128 bits, on others only to 64.
- // On SparcV9 registers can hold 64 or 32 bits, on others only 32.
- if (ST.is64Bit())
- Ret += "-n32:64";
- else
- Ret += "-f128:64-n32";
-
- if (ST.is64Bit())
- Ret += "-S128";
- else
- Ret += "-S64";
-
- return Ret;
-}
-
/// SparcTargetMachine ctor - Create an ILP32 architecture model
///
SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
@@ -58,11 +32,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL,
bool is64bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, is64bit),
- DL(computeDataLayout(Subtarget)),
- InstrInfo(Subtarget),
- TLInfo(*this), TSInfo(*this),
- FrameLowering(Subtarget) {
+ Subtarget(TT, CPU, FS, *this, is64bit) {
initAsmInfo();
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 7d04338..03b5137 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,50 +14,40 @@
#ifndef SPARCTARGETMACHINE_H
#define SPARCTARGETMACHINE_H
-#include "SparcFrameLowering.h"
-#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
-#include "SparcJITInfo.h"
-#include "SparcSelectionDAGInfo.h"
#include "SparcSubtarget.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- SparcInstrInfo InstrInfo;
- SparcTargetLowering TLInfo;
- SparcSelectionDAGInfo TSInfo;
- SparcFrameLowering FrameLowering;
- SparcJITInfo JITInfo;
public:
SparcTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit);
- const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ const SparcInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
}
const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const SparcRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
- const SparcTargetLowering* getTargetLowering() const override {
- return &TLInfo;
+ const SparcTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
}
- const SparcSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ const SparcSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
- SparcJITInfo *getJITInfo() override {
- return &JITInfo;
+ SparcJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 71de64f..758be41 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -104,10 +104,6 @@ private:
MemOp Mem;
};
- SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
- : Kind(kind), StartLoc(startLoc), EndLoc(endLoc)
- {}
-
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
if (!Expr)
@@ -119,40 +115,44 @@ private:
}
public:
+ SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc)
+ : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {}
+
// Create particular kinds of operand.
- static SystemZOperand *createInvalid(SMLoc StartLoc, SMLoc EndLoc) {
- return new SystemZOperand(KindInvalid, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc,
+ SMLoc EndLoc) {
+ return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc);
}
- static SystemZOperand *createToken(StringRef Str, SMLoc Loc) {
- SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc);
+ static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) {
+ auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc);
Op->Token.Data = Str.data();
Op->Token.Length = Str.size();
return Op;
}
- static SystemZOperand *createReg(RegisterKind Kind, unsigned Num,
- SMLoc StartLoc, SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc);
Op->Reg.Kind = Kind;
Op->Reg.Num = Num;
return Op;
}
- static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createAccessReg(unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindAccessReg, StartLoc, EndLoc);
Op->AccessReg = Num;
return Op;
}
- static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
Op->Imm = Expr;
return Op;
}
- static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base,
- const MCExpr *Disp, unsigned Index,
- const MCExpr *Length, SMLoc StartLoc,
- SMLoc EndLoc) {
- SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc);
+ static std::unique_ptr<SystemZOperand>
+ createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp,
+ unsigned Index, const MCExpr *Length, SMLoc StartLoc,
+ SMLoc EndLoc) {
+ auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
Op->Mem.RegKind = RegKind;
Op->Mem.Base = Base;
Op->Mem.Index = Index;
@@ -313,21 +313,19 @@ private:
bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs,
bool IsAddress = false);
- OperandMatchResultTy
- parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- RegisterGroup Group, const unsigned *Regs, RegisterKind Kind);
+ OperandMatchResultTy parseRegister(OperandVector &Operands,
+ RegisterGroup Group, const unsigned *Regs,
+ RegisterKind Kind);
bool parseAddress(unsigned &Base, const MCExpr *&Disp,
unsigned &Index, const MCExpr *&Length,
const unsigned *Regs, RegisterKind RegKind);
- OperandMatchResultTy
- parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs, RegisterKind RegKind,
- MemoryKind MemKind);
+ OperandMatchResultTy parseAddress(OperandVector &Operands,
+ const unsigned *Regs, RegisterKind RegKind,
+ MemoryKind MemKind);
- bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
+ bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
@@ -343,87 +341,66 @@ public:
// Override MCTargetAsmParser.
bool ParseDirective(AsmToken DirectiveID) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands)
- override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
// Used by the TableGen code to parse particular operand types.
- OperandMatchResultTy
- parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg);
}
- OperandMatchResultTy
- parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGRH32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg);
}
- OperandMatchResultTy
- parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGRX32(OperandVector &Operands) {
llvm_unreachable("GRX32 should only be used for pseudo instructions");
}
- OperandMatchResultTy
- parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR64(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg);
}
- OperandMatchResultTy
- parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseGR128(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg);
}
- OperandMatchResultTy
- parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR32(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg);
}
- OperandMatchResultTy
- parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR64(OperandVector &Operands) {
return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg);
}
- OperandMatchResultTy
- parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseADDR128(OperandVector &Operands) {
llvm_unreachable("Shouldn't be used as an operand");
}
- OperandMatchResultTy
- parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP32(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg);
}
- OperandMatchResultTy
- parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP64(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg);
}
- OperandMatchResultTy
- parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseFP128(OperandVector &Operands) {
return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
}
- OperandMatchResultTy
- parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
}
- OperandMatchResultTy
- parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
}
- OperandMatchResultTy
- parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
}
- OperandMatchResultTy
- parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
}
- OperandMatchResultTy
- parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- OperandMatchResultTy
- parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int64_t MinVal, int64_t MaxVal);
- OperandMatchResultTy
- parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parseAccessReg(OperandVector &Operands);
+ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal);
+ OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
}
- OperandMatchResultTy
- parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
}
};
@@ -497,9 +474,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
// Parse a register and add it to Operands. The other arguments are as above.
SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- RegisterGroup Group, const unsigned *Regs,
- RegisterKind Kind) {
+SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
+ const unsigned *Regs, RegisterKind Kind) {
if (Parser.getTok().isNot(AsmToken::Percent))
return MatchOperand_NoMatch;
@@ -566,9 +542,8 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
// Parse a memory operand and add it to Operands. The other arguments
// are as above.
SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const unsigned *Regs, RegisterKind RegKind,
- MemoryKind MemKind) {
+SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs,
+ RegisterKind RegKind, MemoryKind MemKind) {
SMLoc StartLoc = Parser.getTok().getLoc();
unsigned Base, Index;
const MCExpr *Disp;
@@ -622,9 +597,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
return false;
}
-bool SystemZAsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
Operands.push_back(SystemZOperand::createToken(Name, NameLoc));
// Read the remaining operands.
@@ -655,9 +630,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
return false;
}
-bool SystemZAsmParser::
-parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic) {
+bool SystemZAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -700,11 +674,11 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return false;
}
-bool SystemZAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
MCInst Inst;
unsigned MatchResult;
@@ -739,7 +713,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorInfo >= Operands.size())
return Error(IDLoc, "too few operands for instruction");
- ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc();
+ ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
@@ -753,8 +727,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
llvm_unreachable("Unexpected match type");
}
-SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
-parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parseAccessReg(OperandVector &Operands) {
if (Parser.getTok().isNot(AsmToken::Percent))
return MatchOperand_NoMatch;
@@ -768,9 +742,9 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_Success;
}
-SystemZAsmParser::OperandMatchResultTy SystemZAsmParser::
-parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- int64_t MinVal, int64_t MaxVal) {
+SystemZAsmParser::OperandMatchResultTy
+SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
+ int64_t MaxVal) {
MCContext &Ctx = getContext();
MCStreamer &Out = getStreamer();
const MCExpr *Expr;
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
index c4f641e..fb0d1d8 100644
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/lib/Target/SystemZ/SystemZCallingConv.td
@@ -13,7 +13,7 @@ class CCIfExtend<CCAction A>
: CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
//===----------------------------------------------------------------------===//
-// SVR4 return value calling convention
+// z/Linux return value calling convention
//===----------------------------------------------------------------------===//
def RetCC_SystemZ : CallingConv<[
// Promote i32 to i64 if it has an explicit extension type.
@@ -39,7 +39,7 @@ def RetCC_SystemZ : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
-// SVR4 argument calling conventions
+// z/Linux argument calling conventions
//===----------------------------------------------------------------------===//
def CC_SystemZ : CallingConv<[
// Promote i32 to i64 if it has an explicit extension type.
@@ -63,3 +63,9 @@ def CC_SystemZ : CallingConv<[
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
+
+//===----------------------------------------------------------------------===//
+// z/Linux callee-saved registers
+//===----------------------------------------------------------------------===//
+def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15),
+ (sequence "F%dD", 8, 15))>;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 65f3caf..055dbe9 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -10,8 +10,9 @@
#include "SystemZFrameLowering.h"
#include "SystemZCallingConv.h"
#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
#include "SystemZMachineFunctionInfo.h"
-#include "SystemZTargetMachine.h"
+#include "SystemZRegisterInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -44,11 +45,9 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
};
} // end anonymous namespace
-SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm,
- const SystemZSubtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
- -SystemZMC::CallFrameSize, 8),
- TM(tm), STI(sti) {
+SystemZFrameLowering::SystemZFrameLowering()
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
+ -SystemZMC::CallFrameSize, 8) {
// Create a mapping from register number to save slot offset.
RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
@@ -108,9 +107,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// instruction, or an implicit one that comes between the explicit start
// and end registers.
static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
- const SystemZTargetMachine &TM,
unsigned GPR64, bool IsImplicit) {
- const SystemZRegisterInfo *RI = TM.getRegisterInfo();
+ const TargetRegisterInfo *RI = MBB.getParent()->getTarget().getRegisterInfo();
unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32);
bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32);
if (!IsLive || !IsImplicit) {
@@ -176,8 +174,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
// Add the explicit register operands.
- addSavedGPR(MBB, MIB, TM, LowGPR, false);
- addSavedGPR(MBB, MIB, TM, HighGPR, false);
+ addSavedGPR(MBB, MIB, LowGPR, false);
+ addSavedGPR(MBB, MIB, HighGPR, false);
// Add the address.
MIB.addReg(SystemZ::R15D).addImm(StartOffset);
@@ -187,13 +185,13 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
if (SystemZ::GR64BitRegClass.contains(Reg))
- addSavedGPR(MBB, MIB, TM, Reg, true);
+ addSavedGPR(MBB, MIB, Reg, true);
}
// ...likewise GPR varargs.
if (IsVarArg)
for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I)
- addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true);
+ addSavedGPR(MBB, MIB, SystemZ::ArgGPRs[I], true);
}
// Save FPRs in the normal TargetInstrInfo way.
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 70e25fb..4d5fe6d 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -10,7 +10,6 @@
#ifndef SYSTEMZFRAMELOWERING_H
#define SYSTEMZFRAMELOWERING_H
-#include "SystemZSubtarget.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/Target/TargetFrameLowering.h"
@@ -21,13 +20,8 @@ class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
IndexedMap<unsigned> RegSpillOffsets;
-protected:
- const SystemZTargetMachine &TM;
- const SystemZSubtarget &STI;
-
public:
- SystemZFrameLowering(const SystemZTargetMachine &tm,
- const SystemZSubtarget &sti);
+ SystemZFrameLowering();
// Override TargetFrameLowering.
bool isFPCloseToIncomingSP() const override { return false; }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 6fe1fb9..00c65f5 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -80,9 +80,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
return Op;
}
-SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
- : TargetLowering(tm, new TargetLoweringObjectFileELF()),
- Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm)
+ : TargetLowering(tm, new TargetLoweringObjectFileELF()),
+ Subtarget(tm.getSubtarget<SystemZSubtarget>()) {
MVT PtrVT = getPointerTy();
// Set up the register classes.
@@ -673,11 +673,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- auto *TFL = static_cast<const SystemZFrameLowering *>(TM.getFrameLowering());
+ auto *TFL = static_cast<const SystemZFrameLowering *>(
+ DAG.getTarget().getFrameLowering());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
+ *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
unsigned NumFixedGPRs = 0;
@@ -815,7 +817,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext());
+ CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs,
+ *DAG.getContext());
ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
// We don't support GuaranteedTailCallOpt, only automatically-detected
@@ -911,6 +914,12 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegister(RegsToPass[I].first,
RegsToPass[I].second.getValueType()));
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
// Glue the call to the argument copies, if any.
if (Glue.getNode())
Ops.push_back(Glue);
@@ -931,7 +940,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
+ *DAG.getContext());
RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
// Copy all of the result registers out of their specified physreg.
@@ -962,7 +972,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Assign locations to each returned value.
SmallVector<CCValAssign, 16> RetLocs;
- CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext());
+ CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs,
+ *DAG.getContext());
RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
// Quick exit for void returns
@@ -1786,8 +1797,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
const GlobalValue *GV = Node->getGlobal();
int64_t Offset = Node->getOffset();
EVT PtrVT = getPointerTy();
- Reloc::Model RM = TM.getRelocationModel();
- CodeModel::Model CM = TM.getCodeModel();
+ Reloc::Model RM = DAG.getTarget().getRelocationModel();
+ CodeModel::Model CM = DAG.getTarget().getCodeModel();
SDValue Result;
if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) {
@@ -1824,7 +1835,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
SDLoc DL(Node);
const GlobalValue *GV = Node->getGlobal();
EVT PtrVT = getPointerTy();
- TLSModel::Model model = TM.getTLSModel(GV);
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
if (model != TLSModel::LocalExec)
llvm_unreachable("only local-exec TLS mode supported");
@@ -2287,9 +2298,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
// Use an addition if the operand is constant and either LAA(G) is
// available or the negative value is in the range of A(G)FHI.
int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
- if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1())
+ if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
NegSrc2 = DAG.getConstant(Value, MemVT);
- } else if (TM.getSubtargetImpl()->hasInterlockedAccess1())
+ } else if (Subtarget.hasInterlockedAccess1())
// Use LAA(G) if available.
NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT),
Src2);
@@ -2602,7 +2613,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base,
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
+ const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
+ MBB->getParent()->getTarget().getInstrInfo());
unsigned DestReg = MI->getOperand(0).getReg();
unsigned TrueReg = MI->getOperand(1).getReg();
@@ -2650,7 +2662,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
+ const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>(
+ MBB->getParent()->getTarget().getInstrInfo());
unsigned SrcReg = MI->getOperand(0).getReg();
MachineOperand Base = MI->getOperand(1);
@@ -2665,7 +2678,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI,
// Use STOCOpcode if possible. We could use different store patterns in
// order to avoid matching the index register, but the performance trade-offs
// might be more complicated in that case.
- if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
if (Invert)
CCMask ^= CCValid;
BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
@@ -2717,8 +2730,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI,
unsigned BinOpcode,
unsigned BitSize,
bool Invert) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2840,8 +2854,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
unsigned CompareOpcode,
unsigned KeepOldMask,
unsigned BitSize) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
bool IsSubWord = (BitSize < 32);
@@ -2951,8 +2966,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI,
MachineBasicBlock *
SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
// Extract the operands. Base can be a register or a frame index.
@@ -3067,8 +3083,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitExt128(MachineInstr *MI,
MachineBasicBlock *MBB,
bool ClearEven, unsigned SubReg) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3098,8 +3115,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
@@ -3267,8 +3285,9 @@ MachineBasicBlock *
SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
- const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
+ const SystemZInstrInfo *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index bceb25e..e21b050 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -198,7 +198,7 @@ class SystemZTargetMachine;
class SystemZTargetLowering : public TargetLowering {
public:
- explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+ explicit SystemZTargetLowering(const TargetMachine &TM);
// Override TargetLowering.
MVT getScalarShiftAmountTy(EVT LHSTy) const override {
@@ -249,7 +249,6 @@ public:
private:
const SystemZSubtarget &Subtarget;
- const SystemZTargetMachine &TM;
// Implement LowerOperation for individual opcodes.
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index a1e782c..e8841e1 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -133,6 +133,13 @@ def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>;
def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
+def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32, FP64>,
+ Requires<[FeatureFPExtension]>;
+def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>,
+ Requires<[FeatureFPExtension]>;
+
def : Pat<(f32 (fround FP128:$src)),
(EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
def : Pat<(f64 (fround FP128:$src)),
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index add675a..9f59a1c 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -511,34 +511,24 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
// to store. Other stored registers are added as implicit uses.
//
// Unary:
-// One register output operand and one input operand. The input
-// operand may be a register, immediate or memory.
+// One register output operand and one input operand.
//
// Binary:
-// One register output operand and two input operands. The first
-// input operand is always a register and the second may be a register,
-// immediate or memory.
-//
-// Shift:
-// One register output operand and two input operands. The first
-// input operand is a register and the second has the same form as
-// an address (although it isn't actually used to address memory).
+// One register output operand and two input operands.
//
// Compare:
-// Two input operands. The first operand is always a register,
-// the second may be a register, immediate or memory.
+// Two input operands and an implicit CC output operand.
//
// Ternary:
-// One register output operand and three register input operands.
+// One register output operand and three input operands.
//
// LoadAndOp:
-// One output operand and two input operands. The first input operand
-// is a register and the second is an address.
+// One output operand and two input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
//
// CmpSwap:
-// One output operand and three input operands. The first two
-// operands are registers and the third is an address. The instruction
-// both reads from and writes to the address.
+// One output operand and three input operands, one of which is an address.
+// The instruction both reads from and writes to the address.
//
// RotateSelect:
// One output operand and five input operands. The first two operands
@@ -691,7 +681,7 @@ class CondStoreRSY<string mnemonic, bits<16> opcode,
class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3),
+ : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$R3),
mnemonic#"\t$R1, $BD2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayStore = 1;
@@ -730,7 +720,7 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2),
+ : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2),
mnemonic#"r\t$R1, $R3, $R2", []> {
let OpKey = mnemonic ## cls1;
let OpType = "reg";
@@ -739,7 +729,7 @@ class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4),
+ : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2, imm32zx4:$R4),
mnemonic#"\t$R1, $R3, $R2, $R4", []>;
// These instructions are generated by if conversion. The old value of R1
@@ -757,7 +747,7 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
// mask is the third operand rather than being part of the mnemonic.
class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
- : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3),
+ : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, imm32zx4:$R3),
mnemonic#"r\t$R1, $R2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let Constraints = "$R1 = $R1src";
@@ -823,7 +813,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3),
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$R3),
mnemonic#"\t$R1, $BD2, $R3", []>,
Requires<[FeatureLoadStoreOnCond]> {
let mayLoad = 1;
@@ -993,6 +983,33 @@ class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
let DisableEncoding = "$R1src";
}
+class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+ mnemonic#"\t$R1, $BD2",
+ [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
+ let R3 = 0;
+ let Constraints = "$R1 = $R1src";
+ let DisableEncoding = "$R1src";
+}
+
+class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+ RegisterOperand cls>
+ : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+ mnemonic#"\t$R1, $R3, $BD2",
+ [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
+
+multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
+ SDPatternOperator operator, RegisterOperand cls> {
+ let NumOpsKey = mnemonic in {
+ let NumOpsValue = "3" in
+ def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>,
+ Requires<[FeatureDistinctOps]>;
+ let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
+ def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
+ }
+}
+
class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
@@ -1077,33 +1094,6 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
}
}
-class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
- RegisterOperand cls>
- : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
- mnemonic#"\t$R1, $BD2",
- [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
- let R3 = 0;
- let Constraints = "$R1 = $R1src";
- let DisableEncoding = "$R1src";
-}
-
-class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- RegisterOperand cls>
- : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
- mnemonic#"\t$R1, $R3, $BD2",
- [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
-
-multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
- SDPatternOperator operator, RegisterOperand cls> {
- let NumOpsKey = mnemonic in {
- let NumOpsValue = "3" in
- def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>,
- Requires<[FeatureDistinctOps]>;
- let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
- def "" : ShiftRS<mnemonic, opcode1, operator, cls>;
- }
-}
-
class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
@@ -1315,22 +1305,23 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRIEf<opcode, (outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
- : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2),
+ : InstRXY<opcode, (outs), (ins imm32zx4:$R1, bdxaddr20only:$XBD2),
mnemonic##"\t$R1, $XBD2",
- [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>;
+ [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
- : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2),
+ : InstRIL<opcode, (outs), (ins imm32zx4:$R1, pcrel32:$I2),
mnemonic##"\t$R1, $I2",
- [(operator uimm8zx4:$R1, pcrel32:$I2)]> {
+ [(operator imm32zx4:$R1, pcrel32:$I2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@@ -1450,7 +1441,8 @@ class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
// of registers.
class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
: Pseudo<(outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5),
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5),
[]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -1460,9 +1452,9 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
// the value of the PSW's 2-bit condition code field.
class SelectWrapper<RegisterOperand cls>
: Pseudo<(outs cls:$dst),
- (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
[(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2,
- uimm8zx4:$valid, uimm8zx4:$cc))]> {
+ imm32zx4:$valid, imm32zx4:$cc))]> {
let usesCustomInserter = 1;
// Although the instructions used by these nodes do not in themselves
// change CC, the insertion requires new blocks, and CC cannot be live
@@ -1476,14 +1468,14 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
SDPatternOperator load, AddressingMode mode> {
let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in {
def "" : Pseudo<(outs),
- (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>;
def Inv : Pseudo<(outs),
- (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc),
+ (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new,
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr)]>;
}
}
@@ -1611,6 +1603,7 @@ class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
// An alias of a RotateSelectRIEf, but with different register sizes.
class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$R1),
- (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> {
+ (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
+ imm32zx6:$I5), []> {
let Constraints = "$R1 = $R1src";
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 6a18b2d..f58ab47 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -40,9 +40,9 @@ static bool isHighReg(unsigned int Reg) {
// Pin the vtable to this file.
void SystemZInstrInfo::anchor() {}
-SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
- RI(tm), TM(tm) {
+ RI(), STI(sti) {
}
// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
@@ -488,7 +488,7 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
if (Value == 0 &&
!IsLogical &&
- removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo()))
+ removeIPMBasedCompare(Compare, SrcReg, MRI, &RI))
return true;
return false;
}
@@ -505,7 +505,7 @@ static unsigned getConditionalMove(unsigned Opcode) {
bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
unsigned Opcode = MI->getOpcode();
- if (TM.getSubtargetImpl()->hasLoadStoreOnCond() &&
+ if (STI.hasLoadStoreOnCond() &&
getConditionalMove(Opcode))
return true;
return false;
@@ -537,7 +537,7 @@ PredicateInstruction(MachineInstr *MI,
unsigned CCMask = Pred[1].getImm();
assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
unsigned Opcode = MI->getOpcode();
- if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) {
+ if (STI.hasLoadStoreOnCond()) {
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
MI->setDesc(get(CondOpcode));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
@@ -685,7 +685,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// We prefer to keep the two-operand form where possible both
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
- if (TM.getSubtargetImpl()->hasDistinctOps()) {
+ if (STI.hasDistinctOps()) {
MachineOperand &Dest = MI->getOperand(0);
MachineOperand &Src = MI->getOperand(1);
unsigned DestReg = Dest.getReg();
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 09aee5d..83009cb 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -110,9 +110,10 @@ struct Branch {
};
} // end namespace SystemZII
+class SystemZSubtarget;
class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI;
- SystemZTargetMachine &TM;
+ SystemZSubtarget &STI;
void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const;
void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const;
@@ -130,7 +131,7 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
virtual void anchor();
public:
- explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+ explicit SystemZInstrInfo(SystemZSubtarget &STI);
// Override TargetInstrInfo.
unsigned isLoadFromStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index e70df92..f4951ad 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -63,11 +63,11 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in {
def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
brtarget32:$I2), "jg$R1\t$I2", []>;
}
- def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2),
+ def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2),
"brc\t$R1, $I2", []>;
- def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2),
+ def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2),
"brcl\t$R1, $I2", []>;
- def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2),
+ def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
"bcr\t$R1, $R2", []>;
}
@@ -109,7 +109,7 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
}
let isCodeGenOnly = 1 in
defm C : CompareBranches<cond4, "$M3", "">;
-defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">;
+defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">;
// Define AsmParser mnemonics for each general condition-code mask
// (integer or floating-point)
@@ -233,9 +233,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store,
// Call instructions
//===----------------------------------------------------------------------===//
-// The definitions here are for the call-clobbered registers.
-let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
- F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in {
+let isCall = 1, Defs = [R14D, CC] in {
def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
[(z_call pcrel32:$I2)]>;
def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops),
@@ -855,7 +853,7 @@ let Defs = [CC] in {
}
// AND to memory
- defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>;
+ defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>;
// Block AND.
let mayLoad = 1, mayStore = 1 in
@@ -912,7 +910,7 @@ let Defs = [CC] in {
}
// OR to memory
- defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>;
+ defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>;
// Block OR.
let mayLoad = 1, mayStore = 1 in
@@ -952,7 +950,7 @@ let Defs = [CC] in {
}
// XOR to memory
- defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>;
+ defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>;
// Block XOR.
let mayLoad = 1, mayStore = 1 in
@@ -1015,26 +1013,26 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
// Shift left.
let neverHasSideEffects = 1 in {
- defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
- def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>;
+ defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>;
+ def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>;
}
// Logical shift right.
let neverHasSideEffects = 1 in {
- defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
- def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>;
+ defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>;
+ def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>;
}
// Arithmetic shift right.
let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
- defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
- def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>;
+ defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>;
+ def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>;
}
// Rotate left.
let neverHasSideEffects = 1 in {
- def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>;
- def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>;
+ def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>;
+ def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>;
}
// Rotate second operand left and inserted selected bits into first operand.
@@ -1403,15 +1401,15 @@ def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)),
// Optimize sign-extended 1/0 selects to -1/0 selects. This is important
// for vector legalization.
-def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)),
+def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)),
(i32 31)),
(i32 31)),
- (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
-def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid,
- uimm8zx4:$cc)))),
+ (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
+def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid,
+ imm32zx4:$cc)))),
(i32 63)),
(i32 63)),
- (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>;
+ (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>;
// Peepholes for turning scalar operations into block operations.
defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
index 3ad146c..7be81dc 100644
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ b/lib/Target/SystemZ/SystemZOperands.td
@@ -202,21 +202,6 @@ def S32Imm : ImmediateAsmOperand<"S32Imm">;
def U32Imm : ImmediateAsmOperand<"U32Imm">;
//===----------------------------------------------------------------------===//
-// 8-bit immediates
-//===----------------------------------------------------------------------===//
-
-def uimm8zx4 : Immediate<i8, [{
- return isUInt<4>(N->getZExtValue());
-}], NOOP_SDNodeXForm, "U4Imm">;
-
-def uimm8zx6 : Immediate<i8, [{
- return isUInt<6>(N->getZExtValue());
-}], NOOP_SDNodeXForm, "U6Imm">;
-
-def simm8 : Immediate<i8, [{}], SIMM8, "S8Imm">;
-def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">;
-
-//===----------------------------------------------------------------------===//
// i32 immediates
//===----------------------------------------------------------------------===//
@@ -241,6 +226,14 @@ def imm32lh16c : Immediate<i32, [{
}], LH16, "U16Imm">;
// Short immediates
+def imm32zx4 : Immediate<i32, [{
+ return isUInt<4>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U4Imm">;
+
+def imm32zx6 : Immediate<i32, [{
+ return isUInt<6>(N->getZExtValue());
+}], NOOP_SDNodeXForm, "U6Imm">;
+
def imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
@@ -470,13 +463,13 @@ def AccessReg : AsmOperandClass {
let Name = "AccessReg";
let ParserMethod = "parseAccessReg";
}
-def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }],
+def access_reg : Immediate<i32, [{ return N->getZExtValue() < 16; }],
NOOP_SDNodeXForm, "AccessReg"> {
let ParserMatchClass = AccessReg;
}
// A 4-bit condition-code mask.
-def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>,
- Operand<i8> {
+def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
+ Operand<i32> {
let PrintMethod = "printCond4Operand";
}
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index a391961..c70e662 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -19,14 +19,14 @@ def SDT_ZICmp : SDTypeProfile<0, 3,
[SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDT_ZBRCCMask : SDTypeProfile<0, 3,
- [SDTCisVT<0, i8>,
- SDTCisVT<1, i8>,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>,
SDTCisVT<2, OtherVT>]>;
def SDT_ZSelectCCMask : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<1, 2>,
- SDTCisVT<3, i8>,
- SDTCisVT<4, i8>]>;
+ SDTCisVT<3, i32>,
+ SDTCisVT<4, i32>]>;
def SDT_ZWrapPtr : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
@@ -37,7 +37,7 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2,
def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
def SDT_ZExtractAccess : SDTypeProfile<1, 1,
[SDTCisVT<0, i32>,
- SDTCisVT<1, i8>]>;
+ SDTCisVT<1, i32>]>;
def SDT_ZGR128Binary32 : SDTypeProfile<1, 2,
[SDTCisVT<0, untyped>,
SDTCisVT<1, untyped>,
@@ -77,7 +77,7 @@ def SDT_ZString : SDTypeProfile<1, 3,
SDTCisVT<3, i32>]>;
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
- [SDTCisVT<0, i8>,
+ [SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td
index c0f94ec..e307f8a 100644
--- a/lib/Target/SystemZ/SystemZPatterns.td
+++ b/lib/Target/SystemZ/SystemZPatterns.td
@@ -101,15 +101,15 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
SDPatternOperator store, SDPatternOperator load,
AddressingMode mode> {
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr),
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
- uimm8zx4:$valid, uimm8zx4:$cc)>;
+ imm32zx4:$valid, imm32zx4:$cc)>;
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
- uimm8zx4:$valid, uimm8zx4:$cc),
+ imm32zx4:$valid, imm32zx4:$cc),
mode:$addr),
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
- uimm8zx4:$valid, uimm8zx4:$cc)>;
+ imm32zx4:$valid, imm32zx4:$cc)>;
}
// Try to use MVC instruction INSN for a load of type LOAD followed by a store
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index a04d703..f03bcc4 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -7,31 +7,29 @@
//
//===----------------------------------------------------------------------===//
+#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
-#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "SystemZGenRegisterInfo.inc"
-SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm)
- : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {}
+SystemZRegisterInfo::SystemZRegisterInfo()
+ : SystemZGenRegisterInfo(SystemZ::R14D) {}
-const MCPhysReg*
+const MCPhysReg *
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const MCPhysReg CalleeSavedRegs[] = {
- SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
- SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
- SystemZ::R14D, SystemZ::R15D,
- SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
- SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
- 0
- };
-
- return CalleeSavedRegs;
+ return CSR_SystemZ_SaveList;
+}
+
+const uint32_t *
+SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ return CSR_SystemZ_RegMask;
}
BitVector
@@ -63,7 +61,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
MachineFunction &MF = *MBB.getParent();
- auto *TII = static_cast<const SystemZInstrInfo*>(TM.getInstrInfo());
+ auto *TII =
+ static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc DL = MI->getDebugLoc();
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index e236f71..9bffa46 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -29,15 +29,9 @@ inline unsigned odd128(bool Is32bit) {
}
} // end namespace SystemZ
-class SystemZSubtarget;
-class SystemZInstrInfo;
-
struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
-private:
- SystemZTargetMachine &TM;
-
public:
- SystemZRegisterInfo(SystemZTargetMachine &tm);
+ SystemZRegisterInfo();
// Override TargetRegisterInfo.h.
bool requiresRegisterScavenging(const MachineFunction &MF) const override {
@@ -51,6 +45,7 @@ public:
}
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 93d7c83..47ac20d 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -119,6 +119,29 @@ defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
// Floating-point registers
//===----------------------------------------------------------------------===//
+// Maps FPR register numbers to their DWARF encoding.
+class DwarfMapping<int id> { int Id = id; }
+
+def F0Dwarf : DwarfMapping<16>;
+def F2Dwarf : DwarfMapping<17>;
+def F4Dwarf : DwarfMapping<18>;
+def F6Dwarf : DwarfMapping<19>;
+
+def F1Dwarf : DwarfMapping<20>;
+def F3Dwarf : DwarfMapping<21>;
+def F5Dwarf : DwarfMapping<22>;
+def F7Dwarf : DwarfMapping<23>;
+
+def F8Dwarf : DwarfMapping<24>;
+def F10Dwarf : DwarfMapping<25>;
+def F12Dwarf : DwarfMapping<26>;
+def F14Dwarf : DwarfMapping<27>;
+
+def F9Dwarf : DwarfMapping<28>;
+def F11Dwarf : DwarfMapping<29>;
+def F13Dwarf : DwarfMapping<30>;
+def F15Dwarf : DwarfMapping<31>;
+
// Lower 32 bits of one of the 16 64-bit floating-point registers
class FPR32<bits<16> num, string n> : SystemZReg<n> {
let HWEncoding = num;
@@ -142,7 +165,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
foreach I = 0-15 in {
def F#I#S : FPR32<I, "f"#I>;
def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
- DwarfRegNum<[!add(I, 16)]>;
+ DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
}
foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 97abee3..a3cba64 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -18,10 +18,8 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-SystemZSelectionDAGInfo::
-SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 79e7fab..e9de146 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class SystemZTargetMachine;
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+ explicit SystemZSelectionDAGInfo(const DataLayout &DL);
~SystemZSelectionDAGInfo();
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index a011157..e160bc8 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -20,16 +20,11 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#include "SystemZGenSubtargetInfo.inc"
-// Pin the vtabel to this file.
+// Pin the vtable to this file.
void SystemZSubtarget::anchor() {}
-SystemZSubtarget::SystemZSubtarget(const std::string &TT,
- const std::string &CPU,
- const std::string &FS)
- : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
- HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
- TargetTriple(TT) {
+SystemZSubtarget &
+SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
@@ -37,11 +32,26 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
if (CPUName == "generic")
CPUName = sys::getHostCPUName();
#endif
-
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
+ return *this;
}
+SystemZSubtarget::SystemZSubtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS,
+ const TargetMachine &TM)
+ : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
+ HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
+ HasFastSerialization(false), HasInterlockedAccess1(false),
+ TargetTriple(TT),
+ // Make sure that global data has at least 16 bits of alignment by
+ // default, so that we can refer to it using LARL. We don't have any
+ // special requirements for stack variables though.
+ DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ TSInfo(DL), FrameLowering() {}
+
// Return true if GV binds locally under reloc model RM.
static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) {
// For non-PIC, all symbols bind locally.
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index ffca2d8..4e8c710 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -14,6 +14,12 @@
#ifndef SYSTEMZSUBTARGET_H
#define SYSTEMZSUBTARGET_H
+#include "SystemZFrameLowering.h"
+#include "SystemZISelLowering.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -37,10 +43,26 @@ protected:
private:
Triple TargetTriple;
-
+ const DataLayout DL;
+ SystemZInstrInfo InstrInfo;
+ SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
+ SystemZFrameLowering FrameLowering;
+
+ SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS);
public:
SystemZSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
+
+ const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const SystemZRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const SystemZTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 4c9ce29..0122e99 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -22,17 +22,10 @@ extern "C" void LLVMInitializeSystemZTarget() {
SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
- Reloc::Model RM,
- CodeModel::Model CM,
+ Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- // Make sure that global data has at least 16 bits of alignment by default,
- // so that we can refer to it using LARL. We don't have any special
- // requirements for stack variables though.
- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
- InstrInfo(*this), TLInfo(*this), TSInfo(*this),
- FrameLowering(*this, Subtarget) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
@@ -65,7 +58,8 @@ bool SystemZPassConfig::addInstSelector() {
}
bool SystemZPassConfig::addPreSched2() {
- if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
+ if (getOptLevel() != CodeGenOpt::None &&
+ getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond())
addPass(&IfConverterID);
return true;
}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 1db717b..ded07e9 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -15,25 +15,15 @@
#ifndef SYSTEMZTARGETMACHINE_H
#define SYSTEMZTARGETMACHINE_H
-#include "SystemZFrameLowering.h"
-#include "SystemZISelLowering.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSelectionDAGInfo.h"
#include "SystemZSubtarget.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class TargetFrameLowering;
+
class SystemZTargetMachine : public LLVMTargetMachine {
SystemZSubtarget Subtarget;
- const DataLayout DL;
- SystemZInstrInfo InstrInfo;
- SystemZTargetLowering TLInfo;
- SystemZSelectionDAGInfo TSInfo;
- SystemZFrameLowering FrameLowering;
public:
SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU,
@@ -43,25 +33,25 @@ public:
// Override TargetMachine.
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
const SystemZInstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const SystemZSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
const DataLayout *getDataLayout() const override {
- return &DL;
+ return getSubtargetImpl()->getDataLayout();
}
const SystemZRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
}
const SystemZTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
// Override LLVMTargetMachine
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 8365f64..95c8cb6 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -88,8 +88,8 @@ CodeModel::Model TargetMachine::getCodeModel() const {
}
/// Get the IR-specified TLS model for Var.
-static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
- switch (Var->getThreadLocalMode()) {
+static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
+ switch (GV->getThreadLocalMode()) {
case GlobalVariable::NotThreadLocal:
llvm_unreachable("getSelectedTLSModel for non-TLS variable");
break;
@@ -127,13 +127,10 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
Model = TLSModel::InitialExec;
}
- const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
- if (Var) {
- // If the user specified a more specific model, use that.
- TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
- if (SelectedModel > Model)
- return SelectedModel;
- }
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(GV);
+ if (SelectedModel > Model)
+ return SelectedModel;
return Model;
}
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index 3ca13da..87b6b66 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -39,10 +39,23 @@ bool TargetSubtargetInfo::useMachineScheduler() const {
return enableMachineScheduler();
}
+bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const {
+ return true;
+}
+
bool TargetSubtargetInfo::enableMachineScheduler() const {
return false;
}
+bool TargetSubtargetInfo::enableRALocalReassignment(
+ CodeGenOpt::Level OptLevel) const {
+ return true;
+}
+
+bool TargetSubtargetInfo::enablePostMachineScheduler() const {
+ return false;
+}
+
bool TargetSubtargetInfo::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index 0d0a9ca..e2c4be7 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -12,6 +12,7 @@ x86_codegen_TBLGEN_TABLES := \
x86_codegen_SRC_FILES := \
X86AsmPrinter.cpp \
+ X86AtomicExpandPass.cpp \
X86CodeEmitter.cpp \
X86FastISel.cpp \
X86FixupLEAs.cpp \
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index f3e6b3f..a365f62 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -36,8 +37,8 @@ bool IsStackReg(unsigned Reg) {
}
std::string FuncName(unsigned AccessSize, bool IsWrite) {
- return std::string("__sanitizer_sanitize_") + (IsWrite ? "store" : "load") +
- (utostr(AccessSize));
+ return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
+ utostr(AccessSize);
}
class X86AddressSanitizer : public X86AsmInstrumentation {
@@ -47,47 +48,55 @@ public:
// X86AsmInstrumentation implementation:
virtual void InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) override {
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) override {
InstrumentMOV(Inst, Operands, Ctx, MII, Out);
}
// Should be implemented differently in x86_32 and x86_64 subclasses.
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) = 0;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) = 0;
- void InstrumentMemOperand(MCParsedAsmOperand *Op, unsigned AccessSize,
+ void InstrumentMemOperand(MCParsedAsmOperand &Op, unsigned AccessSize,
bool IsWrite, MCContext &Ctx, MCStreamer &Out);
- void InstrumentMOV(const MCInst &Inst,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
+ void InstrumentMOV(const MCInst &Inst, OperandVector &Operands,
MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
void EmitInstruction(MCStreamer &Out, const MCInst &Inst) {
Out.EmitInstruction(Inst, STI);
}
+ void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
+
protected:
const MCSubtargetInfo &STI;
};
void X86AddressSanitizer::InstrumentMemOperand(
- MCParsedAsmOperand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
MCStreamer &Out) {
- assert(Op && Op->isMem() && "Op should be a memory operand.");
+ assert(Op.isMem() && "Op should be a memory operand.");
assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 &&
"AccessSize should be a power of two, less or equal than 16.");
- X86Operand *MemOp = static_cast<X86Operand *>(Op);
+ X86Operand &MemOp = static_cast<X86Operand &>(Op);
// FIXME: get rid of this limitation.
- if (IsStackReg(MemOp->getMemBaseReg()) || IsStackReg(MemOp->getMemIndexReg()))
+ if (IsStackReg(MemOp.getMemBaseReg()) || IsStackReg(MemOp.getMemIndexReg()))
return;
- InstrumentMemOperandImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ // FIXME: take into account load/store alignment.
+ if (AccessSize < 8)
+ InstrumentMemOperandSmallImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
+ else
+ InstrumentMemOperandLargeImpl(MemOp, AccessSize, IsWrite, Ctx, Out);
}
void X86AddressSanitizer::InstrumentMOV(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) {
// Access size in bytes.
unsigned AccessSize = 0;
@@ -124,107 +133,351 @@ void X86AddressSanitizer::InstrumentMOV(
const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
- MCParsedAsmOperand *Op = Operands[Ix];
- if (Op && Op->isMem())
+ assert(Operands[Ix]);
+ MCParsedAsmOperand &Op = *Operands[Ix];
+ if (Op.isMem())
InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out);
}
}
class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
+ static const long kShadowOffset = 0x20000000;
+
X86AddressSanitizer32(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer32() {}
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+
+ private:
+ void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
+ bool IsWrite, unsigned AddressReg) {
+ EmitInstruction(Out, MCInstBuilder(X86::CLD));
+ EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
+
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::ESP)
+ .addReg(X86::ESP).addImm(-16));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(AddressReg));
+
+
+ const std::string& Fn = FuncName(AccessSize, IsWrite);
+ MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+ const MCSymbolRefExpr *FnExpr =
+ MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
+ }
};
-void X86AddressSanitizer32::InstrumentMemOperandImpl(
- X86Operand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+void X86AddressSanitizer32::InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
MCStreamer &Out) {
- // FIXME: emit .cfi directives for correct stack unwinding.
EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EDX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+
{
MCInst Inst;
Inst.setOpcode(X86::LEA32r);
Inst.addOperand(MCOperand::CreateReg(X86::EAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+
+ {
+ MCInst Inst;
+ Inst.setOpcode(X86::MOV8rm);
+ Inst.addOperand(MCOperand::CreateReg(X86::CL));
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(Out,
+ MCInstBuilder(X86::TEST8rr).addReg(X86::CL).addReg(X86::CL));
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::EDX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::EDX)
+ .addReg(X86::EDX).addImm(7));
+
+ switch (AccessSize) {
+ case 1:
+ break;
+ case 2: {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::EDX));
+
+ const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::EDX, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
+ break;
}
+ case 4:
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::EDX)
+ .addReg(X86::EDX).addImm(3));
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::ECX).addReg(X86::CL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::CMP32rr).addReg(X86::EDX).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EDX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+}
+
+void X86AddressSanitizer32::InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF32));
+
{
- const std::string Func = FuncName(AccessSize, IsWrite);
- const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func));
- const MCSymbolRefExpr *FuncExpr =
- MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr));
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::EAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
}
- EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+ {
+ MCInst Inst;
+ switch (AccessSize) {
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ Inst.addOperand(MCOperand::CreateImm(0));
+ EmitInstruction(Out, Inst);
+ }
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF32));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX));
EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
}
class X86AddressSanitizer64 : public X86AddressSanitizer {
public:
+ static const long kShadowOffset = 0x7fff8000;
+
X86AddressSanitizer64(const MCSubtargetInfo &STI)
: X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer64() {}
- virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
- bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) override;
-};
+ virtual void InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
+ virtual void InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) override;
-void X86AddressSanitizer64::InstrumentMemOperandImpl(X86Operand *Op,
- unsigned AccessSize,
- bool IsWrite,
- MCContext &Ctx,
- MCStreamer &Out) {
- // FIXME: emit .cfi directives for correct stack unwinding.
-
- // Set %rsp below current red zone (128 bytes wide) using LEA instruction to
- // preserve flags.
- {
+private:
+ void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
Inst.addOperand(MCOperand::CreateReg(X86::RSP));
- const MCExpr *Disp = MCConstantExpr::Create(-128, Ctx);
+ const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx);
std::unique_ptr<X86Operand> Op(
X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
+
+ void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize,
+ bool IsWrite) {
+ EmitInstruction(Out, MCInstBuilder(X86::CLD));
+ EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS));
+
+ EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::RSP)
+ .addReg(X86::RSP).addImm(-16));
+
+ const std::string& Fn = FuncName(AccessSize, IsWrite);
+ MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn));
+ const MCSymbolRefExpr *FnExpr =
+ MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
+ }
+};
+
+void X86AddressSanitizer64::InstrumentMemOperandSmallImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
+ EmitAdjustRSP(Ctx, Out, -128);
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RCX));
EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
{
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
Inst.addOperand(MCOperand::CreateReg(X86::RDI));
- Op->addMemOperands(Inst, 5);
+ Op.addMemOperands(Inst, 5);
EmitInstruction(Out, Inst);
}
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RAX).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
+ .addReg(X86::RAX).addImm(3));
{
- const std::string Func = FuncName(AccessSize, IsWrite);
- const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func));
- const MCSymbolRefExpr *FuncExpr =
- MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx);
- EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FuncExpr));
+ MCInst Inst;
+ Inst.setOpcode(X86::MOV8rm);
+ Inst.addOperand(MCOperand::CreateReg(X86::AL));
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+
+ EmitInstruction(Out,
+ MCInstBuilder(X86::TEST8rr).addReg(X86::AL).addReg(X86::AL));
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EDI));
+ EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(7));
+
+ switch (AccessSize) {
+ case 1:
+ break;
+ case 2: {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA32r);
+ Inst.addOperand(MCOperand::CreateReg(X86::ECX));
+
+ const MCExpr *Disp = MCConstantExpr::Create(1, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ break;
}
+ case 4:
+ EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::ECX)
+ .addReg(X86::ECX).addImm(3));
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+
+ EmitInstruction(
+ Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::EAX).addReg(X86::AL));
+ EmitInstruction(
+ Out, MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::EAX));
+ EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RCX));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
+ EmitAdjustRSP(Ctx, Out, 128);
+}
+
+void X86AddressSanitizer64::InstrumentMemOperandLargeImpl(
+ X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
+ MCStreamer &Out) {
+ EmitAdjustRSP(Ctx, Out, -128);
+ EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX));
+ EmitInstruction(Out, MCInstBuilder(X86::PUSHF64));
- // Restore old %rsp value.
{
MCInst Inst;
Inst.setOpcode(X86::LEA64r);
- Inst.addOperand(MCOperand::CreateReg(X86::RSP));
-
- const MCExpr *Disp = MCConstantExpr::Create(128, Ctx);
+ Inst.addOperand(MCOperand::CreateReg(X86::RAX));
+ Op.addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
+ EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX)
+ .addReg(X86::RAX).addImm(3));
+ {
+ MCInst Inst;
+ switch (AccessSize) {
+ case 8:
+ Inst.setOpcode(X86::CMP8mi);
+ break;
+ case 16:
+ Inst.setOpcode(X86::CMP16mi);
+ break;
+ default:
+ assert(false && "Incorrect access size");
+ break;
+ }
+ const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx);
std::unique_ptr<X86Operand> Op(
- X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
+ X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc()));
Op->addMemOperands(Inst, 5);
+ Inst.addOperand(MCOperand::CreateImm(0));
EmitInstruction(Out, Inst);
}
+
+ MCSymbol *DoneSym = Ctx.CreateTempSymbol();
+ const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx);
+ EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr));
+
+ EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite);
+ EmitLabel(Out, DoneSym);
+
+ EmitInstruction(Out, MCInstBuilder(X86::POPF64));
+ EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX));
+ EmitAdjustRSP(Ctx, Out, 128);
}
} // End anonymous namespace
@@ -233,8 +486,8 @@ X86AsmInstrumentation::X86AsmInstrumentation() {}
X86AsmInstrumentation::~X86AsmInstrumentation() {}
void X86AsmInstrumentation::InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {}
+ const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
+ const MCInstrInfo &MII, MCStreamer &Out) {}
X86AsmInstrumentation *
CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 0369b14..1bc3c09 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -12,6 +12,8 @@
#include "llvm/ADT/SmallVector.h"
+#include <memory>
+
namespace llvm {
class MCContext;
@@ -35,10 +37,9 @@ public:
// Instruments Inst. Should be called just before the original
// instruction is sent to Out.
virtual void InstrumentInstruction(
- const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx,
- const MCInstrInfo &MII,
- MCStreamer &Out);
+ const MCInst &Inst,
+ SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
friend X86AsmInstrumentation *
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index d3e695e..f0765ed 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -235,6 +235,7 @@ private:
IES_RSHIFT,
IES_PLUS,
IES_MINUS,
+ IES_NOT,
IES_MULTIPLY,
IES_DIVIDE,
IES_LBRAC,
@@ -372,6 +373,7 @@ private:
State = IES_ERROR;
break;
case IES_PLUS:
+ case IES_NOT:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
@@ -401,6 +403,19 @@ private:
}
PrevState = CurrState;
}
+ void onNot() {
+ IntelExprState CurrState = State;
+ switch (State) {
+ default:
+ State = IES_ERROR;
+ break;
+ case IES_PLUS:
+ case IES_NOT:
+ State = IES_NOT;
+ break;
+ }
+ PrevState = CurrState;
+ }
void onRegister(unsigned Reg) {
IntelExprState CurrState = State;
switch (State) {
@@ -438,6 +453,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
State = IES_INTEGER;
Sym = SymRef;
SymName = SymRefName;
@@ -453,6 +469,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
case IES_OR:
case IES_AND:
case IES_LSHIFT:
@@ -476,11 +493,22 @@ private:
PrevState == IES_OR || PrevState == IES_AND ||
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
CurrState == IES_MINUS) {
// Unary minus. No need to pop the minus operand because it was never
// pushed.
IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
+ } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
+ PrevState == IES_OR || PrevState == IES_AND ||
+ PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
+ PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
+ CurrState == IES_NOT) {
+ // Unary not. No need to pop the not operand because it was never
+ // pushed.
+ IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
} else {
IC.pushOperand(IC_IMM, TmpInt);
}
@@ -561,6 +589,7 @@ private:
break;
case IES_PLUS:
case IES_MINUS:
+ case IES_NOT:
case IES_OR:
case IES_AND:
case IES_LSHIFT:
@@ -568,13 +597,14 @@ private:
case IES_MULTIPLY:
case IES_DIVIDE:
case IES_LPAREN:
- // FIXME: We don't handle this type of unary minus, yet.
+ // FIXME: We don't handle this type of unary minus or not, yet.
if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
PrevState == IES_OR || PrevState == IES_AND ||
PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
- PrevState == IES_LPAREN || PrevState == IES_LBRAC) &&
- CurrState == IES_MINUS) {
+ PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
+ PrevState == IES_NOT) &&
+ (CurrState == IES_MINUS || CurrState == IES_NOT)) {
State = IES_ERROR;
break;
}
@@ -618,52 +648,52 @@ private:
return Error(L, Msg, Ranges, MatchingInlineAsm);
}
- X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
+ std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
Error(Loc, Msg);
return nullptr;
}
- X86Operand *DefaultMemSIOperand(SMLoc Loc);
- X86Operand *DefaultMemDIOperand(SMLoc Loc);
- X86Operand *ParseOperand();
- X86Operand *ParseATTOperand();
- X86Operand *ParseIntelOperand();
- X86Operand *ParseIntelOffsetOfOperator();
+ std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
+ std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
+ std::unique_ptr<X86Operand> ParseOperand();
+ std::unique_ptr<X86Operand> ParseATTOperand();
+ std::unique_ptr<X86Operand> ParseIntelOperand();
+ std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
- X86Operand *ParseIntelOperator(unsigned OpKind);
- X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
- X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc,
- unsigned Size);
+ std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
+ std::unique_ptr<X86Operand>
+ ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
+ std::unique_ptr<X86Operand>
+ ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
- X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
- int64_t ImmDisp, unsigned Size);
+ std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
+ SMLoc Start,
+ int64_t ImmDisp,
+ unsigned Size);
bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End);
- X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+ std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
- X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End,
- unsigned Size, StringRef Identifier,
- InlineAsmIdentifierInfo &Info);
+ std::unique_ptr<X86Operand>
+ CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
+ unsigned IndexReg, unsigned Scale, SMLoc Start,
+ SMLoc End, unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
- bool processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+ bool processInstruction(MCInst &Inst, const OperandVector &Ops);
/// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
/// instrumentation around Inst.
- void EmitInstruction(MCInst &Inst,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out);
+ void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
bool MatchingInlineAsm) override;
/// doSrcDstMatch - Returns true if operands are matching in their
@@ -674,8 +704,8 @@ private:
/// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
/// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
/// \return \c true if no parsing errors occurred, \c false otherwise.
- bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const MCParsedAsmOperand &Op);
+ bool HandleAVX512Operand(OperandVector &Operands,
+ const MCParsedAsmOperand &Op);
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
@@ -725,9 +755,8 @@ public:
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool
- ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
};
@@ -908,7 +937,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
return false;
}
-X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
+std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
@@ -916,7 +945,7 @@ X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
/*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
}
-X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
+std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
unsigned basereg =
is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
@@ -924,7 +953,7 @@ X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
/*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0);
}
-X86Operand *X86AsmParser::ParseOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
if (isParsingIntelSyntax())
return ParseIntelOperand();
return ParseATTOperand();
@@ -946,12 +975,10 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
return Size;
}
-X86Operand *
-X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc Start, SMLoc End,
- unsigned Size, StringRef Identifier,
- InlineAsmIdentifierInfo &Info){
+std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
+ unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
+ unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
+ InlineAsmIdentifierInfo &Info) {
// If this is not a VarDecl then assume it is a FuncDecl or some other label
// reference. We need an 'r' constraint here, so we need to create register
// operand to ensure proper matching. Just pick a GPR based on the size of
@@ -1064,7 +1091,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
- switch (getLexer().getKind()) {
+ AsmToken::TokenKind TK = getLexer().getKind();
+ switch (TK) {
default: {
if (SM.isValidEndState()) {
Done = true;
@@ -1076,13 +1104,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
Done = true;
break;
}
+ case AsmToken::String:
case AsmToken::Identifier: {
// This could be a register or a symbolic displacement.
unsigned TmpReg;
const MCExpr *Val;
SMLoc IdentLoc = Tok.getLoc();
StringRef Identifier = Tok.getString();
- if(!ParseRegister(TmpReg, IdentLoc, End)) {
+ if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
SM.onRegister(TmpReg);
UpdateLocLex = false;
break;
@@ -1142,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
}
case AsmToken::Plus: SM.onPlus(); break;
case AsmToken::Minus: SM.onMinus(); break;
+ case AsmToken::Tilde: SM.onNot(); break;
case AsmToken::Star: SM.onStar(); break;
case AsmToken::Slash: SM.onDivide(); break;
case AsmToken::Pipe: SM.onOr(); break;
@@ -1164,9 +1194,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
return false;
}
-X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
- int64_t ImmDisp,
- unsigned Size) {
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
+ int64_t ImmDisp, unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
if (getLexer().isNot(AsmToken::LBrac))
@@ -1270,9 +1300,9 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
}
/// \brief Parse intel style segment override.
-X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
- SMLoc Start,
- unsigned Size) {
+std::unique_ptr<X86Operand>
+X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
+ unsigned Size) {
assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
const AsmToken &Tok = Parser.getTok(); // Eat colon.
if (Tok.isNot(AsmToken::Colon))
@@ -1321,8 +1351,9 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
}
/// ParseIntelMemOperand - Parse intel style memory operand.
-X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
- unsigned Size) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
+ SMLoc Start,
+ unsigned Size) {
const AsmToken &Tok = Parser.getTok();
SMLoc End;
@@ -1425,7 +1456,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
/// Parse the 'offset' operator. This operator is used to specify the
/// location rather then the content of a variable.
-X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
const AsmToken &Tok = Parser.getTok();
SMLoc OffsetOfLoc = Tok.getLoc();
Parser.Lex(); // Eat offset.
@@ -1462,7 +1493,7 @@ enum IntelOperatorKind {
/// variable. A variable's size is the product of its LENGTH and TYPE. The
/// TYPE operator returns the size of a C or C++ type or variable. If the
/// variable is an array, TYPE returns the size of a single element.
-X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
const AsmToken &Tok = Parser.getTok();
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
@@ -1495,7 +1526,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
return X86Operand::CreateImm(Imm, Start, End);
}
-X86Operand *X86AsmParser::ParseIntelOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
const AsmToken &Tok = Parser.getTok();
SMLoc Start, End;
@@ -1523,7 +1554,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
// Immediate.
if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
- getLexer().is(AsmToken::LParen)) {
+ getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
AsmToken StartTok = Tok;
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
@@ -1577,7 +1608,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
}
-X86Operand *X86AsmParser::ParseATTOperand() {
+std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
switch (getLexer().getKind()) {
default:
// Parse a memory operand with no segment register.
@@ -1613,9 +1644,8 @@ X86Operand *X86AsmParser::ParseATTOperand() {
}
}
-bool
-X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- const MCParsedAsmOperand &Op) {
+bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
+ const MCParsedAsmOperand &Op) {
if(STI.getFeatureBits() & X86::FeatureAVX512) {
if (getLexer().is(AsmToken::LCurly)) {
// Eat "{" and mark the current place.
@@ -1653,8 +1683,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands
} else {
// Parse mask register {%k1}
Operands.push_back(X86Operand::CreateToken("{", consumedToken));
- if (X86Operand *Op = ParseOperand()) {
- Operands.push_back(Op);
+ if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
+ Operands.push_back(std::move(Op));
if (!getLexer().is(AsmToken::RCurly))
return !ErrorAndEatStatement(getLexer().getLoc(),
"Expected } at this point");
@@ -1682,7 +1712,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
-X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
+ SMLoc MemStart) {
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
@@ -1845,9 +1876,8 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
MemStart, MemEnd);
}
-bool X86AsmParser::
-ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
InstInfo = &Info;
StringRef PatchedName = Name;
@@ -1940,9 +1970,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// Read the operands.
while(1) {
- if (X86Operand *Op = ParseOperand()) {
- Operands.push_back(Op);
- if (!HandleAVX512Operand(Operands, *Op))
+ if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
+ Operands.push_back(std::move(Op));
+ if (!HandleAVX512Operand(Operands, *Operands.back()))
return true;
} else {
Parser.eatToEndOfStatement();
@@ -1973,27 +2003,25 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
// documented form in various unofficial manuals, so a lot of code uses it.
if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.back();
+ X86Operand &Op = (X86Operand &)*Operands.back();
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
- delete &Op;
}
}
// Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op = (X86Operand &)*Operands[1];
if (Op.isMem() && Op.Mem.SegReg == 0 &&
isa<MCConstantExpr>(Op.Mem.Disp) &&
cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
SMLoc Loc = Op.getEndLoc();
- Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
- delete &Op;
+ Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
}
}
@@ -2060,8 +2088,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.push_back(DefaultMemSIOperand(NameLoc));
}
} else if (Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
- X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ X86Operand &Op = (X86Operand &)*Operands[1];
+ X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
@@ -2076,10 +2104,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
if (Operands.size() == 1) {
- if (Name == "movsd") {
- delete Operands.back();
+ if (Name == "movsd")
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
- }
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemDIOperand(NameLoc));
Operands.push_back(DefaultMemSIOperand(NameLoc));
@@ -2088,8 +2114,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.push_back(DefaultMemDIOperand(NameLoc));
}
} else if (Operands.size() == 3) {
- X86Operand &Op = *(X86Operand*)Operands.begin()[1];
- X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ X86Operand &Op = (X86Operand &)*Operands[1];
+ X86Operand &Op2 = (X86Operand &)*Operands[2];
if (!doSrcDstMatch(Op, Op2))
return Error(Op.getStartLoc(),
"mismatching source and destination index registers");
@@ -2105,31 +2131,26 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
Operands.size() == 3) {
if (isParsingIntelSyntax()) {
// Intel syntax
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[2];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.pop_back();
- }
} else {
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
- delete Operands[1];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
Operands.erase(Operands.begin() + 1);
- }
}
}
// Transforms "int $3" into "int3" as a size optimization. We can't write an
// instalias with an immediate operand yet.
if (Name == "int" && Operands.size() == 2) {
- X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
- if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
- cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
- delete Operands[1];
+ X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
+ if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
+ cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
Operands.erase(Operands.begin() + 1);
- static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
}
}
@@ -2175,9 +2196,7 @@ static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
}
-bool X86AsmParser::
-processInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
switch (Inst.getOpcode()) {
default: return false;
case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
@@ -2258,51 +2277,47 @@ processInstruction(MCInst &Inst,
static const char *getSubtargetFeatureName(unsigned Val);
-void X86AsmParser::EmitInstruction(
- MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCStreamer &Out) {
+void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
+ MCStreamer &Out) {
Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
Out);
Out.EmitInstruction(Inst, STI);
}
-bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
+bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
assert(!Operands.empty() && "Unexpect empty operand list!");
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+ X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
+ assert(Op.isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = None;
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
// call.
- if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
- Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
- Op->getToken() == "finit" || Op->getToken() == "fsave" ||
- Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+ if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" ||
+ Op.getToken() == "fstsww" || Op.getToken() == "fstcww" ||
+ Op.getToken() == "finit" || Op.getToken() == "fsave" ||
+ Op.getToken() == "fstenv" || Op.getToken() == "fclex") {
MCInst Inst;
Inst.setOpcode(X86::WAIT);
Inst.setLoc(IDLoc);
if (!MatchingInlineAsm)
EmitInstruction(Inst, Operands, Out);
- const char *Repl =
- StringSwitch<const char*>(Op->getToken())
- .Case("finit", "fninit")
- .Case("fsave", "fnsave")
- .Case("fstcw", "fnstcw")
- .Case("fstcww", "fnstcw")
- .Case("fstenv", "fnstenv")
- .Case("fstsw", "fnstsw")
- .Case("fstsww", "fnstsw")
- .Case("fclex", "fnclex")
- .Default(nullptr);
+ const char *Repl = StringSwitch<const char *>(Op.getToken())
+ .Case("finit", "fninit")
+ .Case("fsave", "fnsave")
+ .Case("fstcw", "fnstcw")
+ .Case("fstcww", "fnstcw")
+ .Case("fstenv", "fnstenv")
+ .Case("fstsw", "fnstsw")
+ .Case("fstsww", "fnstsw")
+ .Case("fclex", "fnclex")
+ .Default(nullptr);
assert(Repl && "Unknown wait-prefixed instruction");
- delete Operands[0];
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
}
@@ -2355,11 +2370,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// following hack.
// Change the operand to point to a temporary token.
- StringRef Base = Op->getToken();
+ StringRef Base = Op.getToken();
SmallString<16> Tmp;
Tmp += Base;
Tmp += ' ';
- Op->setTokenValue(Tmp.str());
+ Op.setTokenValue(Tmp.str());
// If this instruction starts with an 'f', then it is a floating point stack
// instruction. These come in up to three forms for 32-bit, 64-bit, and
@@ -2400,7 +2415,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
ErrorInfoMissingFeature = ErrorInfoIgnore;
// Restore the old token.
- Op->setTokenValue(Base);
+ Op.setTokenValue(Base);
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
@@ -2450,8 +2465,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
(Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
if (!WasOriginallyInvalidOperand) {
- ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
- Op->getLocRange();
+ ArrayRef<SMRange> Ranges =
+ MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
Ranges, MatchingInlineAsm);
}
@@ -2462,10 +2477,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return Error(IDLoc, "too few operands for instruction",
EmptyRanges, MatchingInlineAsm);
- X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
- if (Operand->getStartLoc().isValid()) {
- SMRange OperandRange = Operand->getLocRange();
- return Error(Operand->getStartLoc(), "invalid operand for instruction",
+ X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
+ if (Operand.getStartLoc().isValid()) {
+ SMRange OperandRange = Operand.getLocRange();
+ return Error(Operand.getStartLoc(), "invalid operand for instruction",
OperandRange, MatchingInlineAsm);
}
}
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index de3be38..1bbfc11 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -13,6 +13,7 @@
#include "X86AsmParserCommon.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/STLExtras.h"
namespace llvm {
@@ -410,20 +411,19 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
}
- static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+ static std::unique_ptr<X86Operand> CreateToken(StringRef Str, SMLoc Loc) {
SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size());
- X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
+ auto Res = llvm::make_unique<X86Operand>(Token, Loc, EndLoc);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
return Res;
}
- static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
- bool AddressOf = false,
- SMLoc OffsetOfLoc = SMLoc(),
- StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
- X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand>
+ CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc,
+ bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(),
+ StringRef SymName = StringRef(), void *OpDecl = nullptr) {
+ auto Res = llvm::make_unique<X86Operand>(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
Res->OffsetOfLoc = OffsetOfLoc;
@@ -432,17 +432,18 @@ struct X86Operand : public MCParsedAsmOperand {
return Res;
}
- static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
- X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand> CreateImm(const MCExpr *Val,
+ SMLoc StartLoc, SMLoc EndLoc) {
+ auto Res = llvm::make_unique<X86Operand>(Immediate, StartLoc, EndLoc);
Res->Imm.Val = Val;
return Res;
}
/// Create an absolute memory operand.
- static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
- X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ static std::unique_ptr<X86Operand>
+ CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0,
+ StringRef SymName = StringRef(), void *OpDecl = nullptr) {
+ auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = 0;
@@ -456,12 +457,11 @@ struct X86Operand : public MCParsedAsmOperand {
}
/// Create a generalized memory operand.
- static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
- unsigned BaseReg, unsigned IndexReg,
- unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0,
- StringRef SymName = StringRef(),
- void *OpDecl = nullptr) {
+ static std::unique_ptr<X86Operand>
+ CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
+ unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
+ unsigned Size = 0, StringRef SymName = StringRef(),
+ void *OpDecl = nullptr) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -469,7 +469,7 @@ struct X86Operand : public MCParsedAsmOperand {
// The scale should always be one of {1,2,4,8}.
assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
"Invalid scale!");
- X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+ auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = SegReg;
Res->Mem.Disp = Disp;
Res->Mem.BaseReg = BaseReg;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index c54fbc1..a09767e 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
+ X86AtomicExpandPass.cpp
X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 804606d..55587d4 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1620,7 +1620,8 @@ static int readVVVV(struct InternalInstruction* insn) {
int vvvv;
if (insn->vectorExtensionType == TYPE_EVEX)
- vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
+ vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
+ vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
else if (insn->vectorExtensionType == TYPE_VEX_3B)
vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
else if (insn->vectorExtensionType == TYPE_VEX_2B)
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index bf30a8e..23bca0d 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -73,11 +73,12 @@ public:
};
class X86AsmBackend : public MCAsmBackend {
- StringRef CPU;
+ const StringRef CPU;
bool HasNopl;
+ const uint64_t MaxNopLength;
public:
X86AsmBackend(const Target &T, StringRef _CPU)
- : MCAsmBackend(), CPU(_CPU) {
+ : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) {
HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
@@ -331,7 +332,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
// 15 is the longest single nop instruction. Emit as many 15-byte nops as
// needed, then emit a nop of the remaining length.
do {
- const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15);
+ const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
for (uint8_t i = 0; i < Prefixes; i++)
OW->Write8(0x66);
@@ -365,6 +366,17 @@ public:
}
};
+class ELFX86_X32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+ : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
+ ELF::EM_X86_64);
+ }
+};
+
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
@@ -717,11 +729,10 @@ public:
};
class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
- bool SupportsCU;
public:
DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef CPU, bool SupportsCU)
- : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {}
+ StringRef CPU)
+ : DarwinX86AsmBackend(T, MRI, CPU, false) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
@@ -732,20 +743,16 @@ public:
/// \brief Generate the compact unwind encoding for the CFI instructions.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
- return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ return generateCompactUnwindEncodingImpl(Instrs);
}
};
class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
- bool SupportsCU;
const MachO::CPUSubTypeX86 Subtype;
public:
DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef CPU, bool SupportsCU,
- MachO::CPUSubTypeX86 st)
- : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU),
- Subtype(st) {
- }
+ StringRef CPU, MachO::CPUSubTypeX86 st)
+ : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
@@ -788,7 +795,7 @@ public:
/// \brief Generate the compact unwind encoding for the CFI instructions.
uint32_t generateCompactUnwindEncoding(
ArrayRef<MCCFIInstruction> Instrs) const override {
- return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0;
+ return generateCompactUnwindEncodingImpl(Instrs);
}
};
@@ -801,9 +808,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
Triple TheTriple(TT);
if (TheTriple.isOSBinFormatMachO())
- return new DarwinX86_32AsmBackend(T, MRI, CPU,
- TheTriple.isMacOSX() &&
- !TheTriple.isMacOSXVersionLT(10, 7));
+ return new DarwinX86_32AsmBackend(T, MRI, CPU);
if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF())
return new WindowsX86AsmBackend(T, false, CPU);
@@ -823,14 +828,15 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
.Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H)
.Default(MachO::CPU_SUBTYPE_X86_64_ALL);
- return new DarwinX86_64AsmBackend(T, MRI, CPU,
- TheTriple.isMacOSX() &&
- !TheTriple.isMacOSXVersionLT(10, 7), CS);
+ return new DarwinX86_64AsmBackend(T, MRI, CPU, CS);
}
if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF())
return new WindowsX86AsmBackend(T, true, CPU);
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+
+ if (TheTriple.getEnvironment() == Triple::GNUX32)
+ return new ELFX86_X32AsmBackend(T, OSABI, CPU);
return new ELFX86_64AsmBackend(T, OSABI, CPU);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 39480ea..83b2777 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -74,8 +74,9 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
// FIXME: this should not depend on the target OS version, but on the ld64
// version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified
- // FDE relocs may be used.
- DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6);
+ // FDE relocs may be used. We also use them for the ios simulator.
+ DwarfFDESymbolsUseAbsDiff = (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+ || T.isiOS();
UseIntegratedAssembler = true;
}
@@ -142,8 +143,11 @@ getNonexecutableStackSection(MCContext &Ctx) const {
void X86MCAsmInfoMicrosoft::anchor() { }
X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PointerSize = 8;
+ ExceptionsType = ExceptionHandling::WinEH;
+ }
AssemblerDialect = AsmWriterFlavor;
@@ -157,17 +161,18 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
+ assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PointerSize = 8;
+ ExceptionsType = ExceptionHandling::WinEH;
+ } else {
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
AssemblerDialect = AsmWriterFlavor;
TextAlignFillValue = 0x90;
- // Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfCFI;
-
UseIntegratedAssembler = true;
}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index e63036c..5e29e5c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -197,14 +197,13 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
}
}
-unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::x86_64)
+unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) {
+ if (TT.getArch() == Triple::x86_64)
return DWARFFlavour::X86_64;
- if (TheTriple.isOSDarwin())
+ if (TT.isOSDarwin())
return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic;
- if (TheTriple.isOSCygMing())
+ if (TT.isOSCygMing())
// Unsupported by now, just quick fallback
return DWARFFlavour::X86_32_Generic;
return DWARFFlavour::X86_32_Generic;
@@ -251,8 +250,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
InitX86MCRegisterInfo(X, RA,
- X86_MC::getDwarfRegFlavour(TT, false),
- X86_MC::getDwarfRegFlavour(TT, true),
+ X86_MC::getDwarfRegFlavour(TheTriple, false),
+ X86_MC::getDwarfRegFlavour(TheTriple, true),
RA);
X86_MC::InitLLVM2SEHRegisterMapping(X);
return X;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 8fe40fd..ebe74cf 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -28,6 +28,7 @@ class MCSubtargetInfo;
class MCRelocationInfo;
class MCStreamer;
class Target;
+class Triple;
class StringRef;
class raw_ostream;
@@ -64,7 +65,7 @@ namespace X86_MC {
void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
- unsigned getDwarfRegFlavour(StringRef TT, bool isEH);
+ unsigned getDwarfRegFlavour(Triple TT, bool isEH);
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index c62fd0a..7fa4180 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -19,12 +19,12 @@ public:
raw_ostream &OS)
: MCWinCOFFStreamer(C, AB, *CE, OS) { }
- void EmitWin64EHHandlerData() override;
+ void EmitWinEHHandlerData() override;
void FinishImpl() override;
};
-void X86WinCOFFStreamer::EmitWin64EHHandlerData() {
- MCStreamer::EmitWin64EHHandlerData();
+void X86WinCOFFStreamer::EmitWinEHHandlerData() {
+ MCStreamer::EmitWinEHHandlerData();
// We have to emit the unwind info now, because this directive
// actually switches to the .xdata section!
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 64e8ea8..d5522ed 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,6 +24,10 @@ class ImmutablePass;
class JITCodeEmitter;
class X86TargetMachine;
+/// createX86AtomicExpandPass - This pass expands atomic operations that cannot
+/// be handled natively in terms of a loop using cmpxchg.
+FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM);
+
/// createX86ISelDag - This pass converts a legalized DAG into a
/// X86-specific DAG, ready for instruction scheduling.
///
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 6912b57..93f516a 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -168,6 +168,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
+def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+ "INC and DEC instructions are slower than ADD and SUB">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -228,7 +230,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeaturePCLMUL, FeatureAES,
FeatureCallRegIndirect,
FeaturePRFCHW,
- FeatureSlowLEA,
+ FeatureSlowLEA, FeatureSlowIncDec,
FeatureSlowBTMem, FeatureFastUAMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
@@ -271,7 +273,8 @@ def : ProcessorModel<"knl", HaswellModel,
FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
- FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>;
+ FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
+ FeatureSlowIncDec]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [Feature3DNow]>;
diff --git a/lib/Target/X86/X86AtomicExpandPass.cpp b/lib/Target/X86/X86AtomicExpandPass.cpp
new file mode 100644
index 0000000..61eefbb
--- /dev/null
+++ b/lib/Target/X86/X86AtomicExpandPass.cpp
@@ -0,0 +1,287 @@
+//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass (at IR level) to replace atomic instructions which
+// cannot be implemented as a single instruction with cmpxchg-based loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-atomic-expand"
+
+namespace {
+ class X86AtomicExpandPass : public FunctionPass {
+ const X86TargetMachine *TM;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit X86AtomicExpandPass(const X86TargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {}
+
+ bool runOnFunction(Function &F) override;
+ bool expandAtomicInsts(Function &F);
+
+ bool needsCmpXchgNb(Type *MemType);
+
+ /// There are four kinds of atomic operations. Two never need expanding:
+ /// cmpxchg is what we expand the others *to*, and loads are easily handled
+ /// by ISelLowering. Atomicrmw and store can need expanding in some
+ /// circumstances.
+ bool shouldExpand(Instruction *Inst);
+
+ /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms
+ /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic.
+ bool shouldExpandStore(StoreInst *SI);
+
+ /// Only some atomicrmw instructions need expanding -- some operations
+ /// (e.g. max) have absolutely no architectural support; some (e.g. or) have
+ /// limited support but can't return the previous value; some (e.g. add)
+ /// have complete support in the instruction set.
+ ///
+ /// Also, naturally, 128-bit operations always need to be expanded.
+ bool shouldExpandAtomicRMW(AtomicRMWInst *AI);
+
+ bool expandAtomicRMW(AtomicRMWInst *AI);
+ bool expandAtomicStore(StoreInst *SI);
+ };
+}
+
+char X86AtomicExpandPass::ID = 0;
+
+FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) {
+ return new X86AtomicExpandPass(TM);
+}
+
+bool X86AtomicExpandPass::runOnFunction(Function &F) {
+ SmallVector<Instruction *, 1> AtomicInsts;
+
+ // Changing control-flow while iterating through it is a bad idea, so gather a
+ // list of all atomic instructions before we start.
+ for (BasicBlock &BB : F)
+ for (Instruction &Inst : BB) {
+ if (isa<AtomicRMWInst>(&Inst) ||
+ (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic()))
+ AtomicInsts.push_back(&Inst);
+ }
+
+ bool MadeChange = false;
+ for (Instruction *Inst : AtomicInsts) {
+ if (!shouldExpand(Inst))
+ continue;
+
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ MadeChange |= expandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ MadeChange |= expandAtomicStore(SI);
+
+ assert(MadeChange && "Atomic inst not expanded when it should be?");
+ Inst->eraseFromParent();
+ }
+
+ return MadeChange;
+}
+
+/// Returns true if operations on the given type will need to use either
+/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the
+/// native width, and the instructions are available (otherwise we leave them
+/// alone to become __sync_fetch_and_... calls).
+bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ if (!Subtarget.hasCmpxchg16b())
+ return false;
+
+ unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64;
+
+ unsigned OpWidth = MemType->getPrimitiveSizeInBits();
+ if (OpWidth == CmpXchgNbWidth)
+ return true;
+
+ return false;
+}
+
+
+bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) {
+ const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>();
+ unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
+
+ if (needsCmpXchgNb(AI->getType()))
+ return true;
+
+ if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth)
+ return false;
+
+ AtomicRMWInst::BinOp Op = AI->getOperation();
+ switch (Op) {
+ default:
+ llvm_unreachable("Unknown atomic operation");
+ case AtomicRMWInst::Xchg:
+ case AtomicRMWInst::Add:
+ case AtomicRMWInst::Sub:
+ // It's better to use xadd, xsub or xchg for these in all cases.
+ return false;
+ case AtomicRMWInst::Or:
+ case AtomicRMWInst::And:
+ case AtomicRMWInst::Xor:
+ // If the atomicrmw's result isn't actually used, we can just add a "lock"
+ // prefix to a normal instruction for these operations.
+ return !AI->use_empty();
+ case AtomicRMWInst::Nand:
+ case AtomicRMWInst::Max:
+ case AtomicRMWInst::Min:
+ case AtomicRMWInst::UMax:
+ case AtomicRMWInst::UMin:
+ // These always require a non-trivial set of data operations on x86. We must
+ // use a cmpxchg loop.
+ return true;
+ }
+}
+
+bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) {
+ if (needsCmpXchgNb(SI->getValueOperand()->getType()))
+ return true;
+
+ return false;
+}
+
+bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) {
+ if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
+ return shouldExpandAtomicRMW(AI);
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return shouldExpandStore(SI);
+ return false;
+}
+
+/// Emit IR to implement the given atomicrmw operation on values in registers,
+/// returning the new value.
+static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
+ Value *Loaded, Value *Inc) {
+ Value *NewVal;
+ switch (Op) {
+ case AtomicRMWInst::Xchg:
+ return Inc;
+ case AtomicRMWInst::Add:
+ return Builder.CreateAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::Sub:
+ return Builder.CreateSub(Loaded, Inc, "new");
+ case AtomicRMWInst::And:
+ return Builder.CreateAnd(Loaded, Inc, "new");
+ case AtomicRMWInst::Nand:
+ return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
+ case AtomicRMWInst::Or:
+ return Builder.CreateOr(Loaded, Inc, "new");
+ case AtomicRMWInst::Xor:
+ return Builder.CreateXor(Loaded, Inc, "new");
+ case AtomicRMWInst::Max:
+ NewVal = Builder.CreateICmpSGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::Min:
+ NewVal = Builder.CreateICmpSLE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMax:
+ NewVal = Builder.CreateICmpUGT(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::UMin:
+ NewVal = Builder.CreateICmpULE(Loaded, Inc);
+ return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ default:
+ break;
+ }
+ llvm_unreachable("Unknown atomic op");
+}
+
+bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+ AtomicOrdering Order =
+ AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+ Value *Addr = AI->getPointerOperand();
+ BasicBlock *BB = AI->getParent();
+ Function *F = BB->getParent();
+ LLVMContext &Ctx = F->getContext();
+
+ // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+ //
+ // The standard expansion we produce is:
+ // [...]
+ // %init_loaded = load atomic iN* %addr
+ // br label %loop
+ // loop:
+ // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+ // %new = some_op iN %loaded, %incr
+ // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+ // %new_loaded = extractvalue { iN, i1 } %pair, 0
+ // %success = extractvalue { iN, i1 } %pair, 1
+ // br i1 %success, label %atomicrmw.end, label %loop
+ // atomicrmw.end:
+ // [...]
+ BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+ BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+ // This grabs the DebugLoc from AI.
+ IRBuilder<> Builder(AI);
+
+ // The split call above "helpfully" added a branch at the end of BB (to the
+ // wrong place), but we want a load. It's easiest to just remove
+ // the branch entirely.
+ std::prev(BB->end())->eraseFromParent();
+ Builder.SetInsertPoint(BB);
+ LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+ InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
+ Builder.CreateBr(LoopBB);
+
+ // Start the main loop block now that we've taken care of the preliminaries.
+ Builder.SetInsertPoint(LoopBB);
+ PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+ Loaded->addIncoming(InitLoaded, BB);
+
+ Value *NewVal =
+ performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+ Value *Pair = Builder.CreateAtomicCmpXchg(
+ Addr, Loaded, NewVal, Order,
+ AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
+ Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+ Loaded->addIncoming(NewLoaded, LoopBB);
+
+ Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
+ Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+ AI->replaceAllUsesWith(NewLoaded);
+
+ return true;
+}
+
+bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+ // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express
+ // this in terms of the usual expansion to "atomicrmw xchg".
+ IRBuilder<> Builder(SI);
+ AtomicOrdering Order =
+ SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering();
+ AtomicRMWInst *AI =
+ Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
+ SI->getValueOperand(), Order);
+
+ // Now we have an appropriate swap instruction, lower it as usual.
+ if (shouldExpandAtomicRMW(AI)) {
+ expandAtomicRMW(AI);
+ AI->eraseFromParent();
+ return true;
+ }
+
+ return AI;
+}
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 76718d0..a3ae7ee 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1113,9 +1113,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
- if (MI.getOperand(0).getSymbolName()[0])
+ if (MI.getOperand(0).getSymbolName()[0]) {
+ DebugLoc DL = MI.getDebugLoc();
+ DL.print(MI.getParent()->getParent()->getFunction()->getContext(),
+ llvm::errs());
report_fatal_error("JIT does not support inline asm!");
+ }
break;
+ case TargetOpcode::DBG_VALUE:
case TargetOpcode::CFI_INSTRUCTION:
break;
case TargetOpcode::GC_LABEL:
@@ -1126,6 +1131,16 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
break;
+
+ case X86::SEH_PushReg:
+ case X86::SEH_SaveReg:
+ case X86::SEH_SaveXMM:
+ case X86::SEH_StackAlloc:
+ case X86::SEH_SetFrame:
+ case X86::SEH_PushFrame:
+ case X86::SEH_EndPrologue:
+ break;
+
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
MCE.emitByte(BaseOpcode);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 56bcfa3..ce554ba 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -16,10 +16,12 @@
#include "X86.h"
#include "X86CallingConv.h"
#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -78,12 +80,14 @@ public:
private:
bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
- bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+ bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
+ unsigned &ResultReg);
bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
- bool Aligned = false);
- bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM,
- bool Aligned = false);
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
+ bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO = nullptr, bool Aligned = false);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -107,6 +111,12 @@ private:
bool X86SelectDivRem(const Instruction *I);
+ bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
+
+ bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
+
bool X86SelectSelect(const Instruction *I);
bool X86SelectTrunc(const Instruction *I);
@@ -147,10 +157,182 @@ private:
bool TryEmitSmallMemcpy(X86AddressMode DestAM,
X86AddressMode SrcAM, uint64_t Len);
+
+ bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond);
};
} // end anonymous namespace.
+static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) {
+ // If both operands are the same, then try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = CI->getPredicate();
+ if (CI->getOperand(0) != CI->getOperand(1))
+ return Predicate;
+
+ switch (Predicate) {
+ default: llvm_unreachable("Invalid predicate!");
+ case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break;
+ case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break;
+ case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break;
+
+ case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break;
+ case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break;
+ case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break;
+ }
+
+ return Predicate;
+}
+
+static std::pair<X86::CondCode, bool>
+getX86ConditionCode(CmpInst::Predicate Predicate) {
+ X86::CondCode CC = X86::COND_INVALID;
+ bool NeedSwap = false;
+ switch (Predicate) {
+ default: break;
+ // Floating-point Predicates
+ case CmpInst::FCMP_UEQ: CC = X86::COND_E; break;
+ case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGT: CC = X86::COND_A; break;
+ case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OGE: CC = X86::COND_AE; break;
+ case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::FCMP_ONE: CC = X86::COND_NE; break;
+ case CmpInst::FCMP_UNO: CC = X86::COND_P; break;
+ case CmpInst::FCMP_ORD: CC = X86::COND_NP; break;
+ case CmpInst::FCMP_OEQ: // fall-through
+ case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
+
+ // Integer Predicates
+ case CmpInst::ICMP_EQ: CC = X86::COND_E; break;
+ case CmpInst::ICMP_NE: CC = X86::COND_NE; break;
+ case CmpInst::ICMP_UGT: CC = X86::COND_A; break;
+ case CmpInst::ICMP_UGE: CC = X86::COND_AE; break;
+ case CmpInst::ICMP_ULT: CC = X86::COND_B; break;
+ case CmpInst::ICMP_ULE: CC = X86::COND_BE; break;
+ case CmpInst::ICMP_SGT: CC = X86::COND_G; break;
+ case CmpInst::ICMP_SGE: CC = X86::COND_GE; break;
+ case CmpInst::ICMP_SLT: CC = X86::COND_L; break;
+ case CmpInst::ICMP_SLE: CC = X86::COND_LE; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+static std::pair<unsigned, bool>
+getX86SSEConditionCode(CmpInst::Predicate Predicate) {
+ unsigned CC;
+ bool NeedSwap = false;
+
+ // SSE Condition code mapping:
+ // 0 - EQ
+ // 1 - LT
+ // 2 - LE
+ // 3 - UNORD
+ // 4 - NEQ
+ // 5 - NLT
+ // 6 - NLE
+ // 7 - ORD
+ switch (Predicate) {
+ default: llvm_unreachable("Unexpected predicate");
+ case CmpInst::FCMP_OEQ: CC = 0; break;
+ case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLT: CC = 1; break;
+ case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_OLE: CC = 2; break;
+ case CmpInst::FCMP_UNO: CC = 3; break;
+ case CmpInst::FCMP_UNE: CC = 4; break;
+ case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGE: CC = 5; break;
+ case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+ case CmpInst::FCMP_UGT: CC = 6; break;
+ case CmpInst::FCMP_ORD: CC = 7; break;
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_ONE: CC = 8; break;
+ }
+
+ return std::make_pair(CC, NeedSwap);
+}
+
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ X86::CondCode TmpCC;
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (II->getParent() != I->getParent())
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
if (evt == MVT::Other || !evt.isSimple())
@@ -180,7 +362,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
- unsigned &ResultReg) {
+ MachineMemOperand *MMO, unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
@@ -228,8 +410,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc), ResultReg), AM);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
+ addFullAddress(MIB, AM);
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
@@ -237,9 +422,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
-bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
- const X86AddressMode &AM, bool Aligned) {
+bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -249,7 +434,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(&X86::GR8RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1);
+ TII.get(X86::AND8ri), AndResult)
+ .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
ValReg = AndResult;
}
// FALLTHROUGH, handling i1 as i8.
@@ -288,13 +474,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg,
break;
}
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc)), AM).addReg(ValReg);
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
+
return true;
}
bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM, bool Aligned) {
+ const X86AddressMode &AM,
+ MachineMemOperand *MMO, bool Aligned) {
// Handle 'null' like i32/i64 0.
if (isa<ConstantPointerNull>(Val))
Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
@@ -317,10 +508,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
if (Opc) {
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
- DbgLoc, TII.get(Opc)), AM)
- .addImm(Signed ? (uint64_t) CI->getSExtValue() :
- CI->getZExtValue());
+ MachineInstrBuilder MIB =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
+ addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
+ : CI->getZExtValue());
+ if (MMO)
+ MIB->addMemOperand(*FuncInfo.MF, MMO);
return true;
}
}
@@ -329,7 +522,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
if (ValReg == 0)
return false;
- return X86FastEmitStore(VT, ValReg, AM, Aligned);
+ bool ValKill = hasTrivialKill(Val);
+ return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
}
/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
@@ -355,17 +549,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
return false;
// Can't handle TLS yet.
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->isThreadLocal())
- return false;
-
- // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
- // it works...).
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->getAliasee()))
- if (GVar->isThreadLocal())
- return false;
+ if (GV->isThreadLocal())
+ return false;
// RIP-relative addresses can't have additional register operands, so if
// we've already folded stuff into the addressing mode, just force the
@@ -696,7 +881,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
(AM.Base.Reg != 0 || AM.IndexReg != 0))
return false;
- // Can't handle DbgLocLImport.
+ // Can't handle DLL Import.
if (GV->hasDLLImportStorageClass())
return false;
@@ -749,19 +934,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
if (S->isAtomic())
return false;
- unsigned SABIAlignment =
- DL.getABITypeAlignment(S->getValueOperand()->getType());
- bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment;
+ const Value *Val = S->getValueOperand();
+ const Value *Ptr = S->getPointerOperand();
MVT VT;
- if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+ if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
return false;
+ unsigned Alignment = S->getAlignment();
+ unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = ABIAlignment;
+ bool Aligned = Alignment >= ABIAlignment;
+
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(1), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
- return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned);
+ return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
}
/// X86SelectRet - Select and emit code to implement ret instructions.
@@ -896,25 +1086,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
/// X86SelectLoad - Select and emit code to implement load instructions.
///
-bool X86FastISel::X86SelectLoad(const Instruction *I) {
+bool X86FastISel::X86SelectLoad(const Instruction *I) {
+ const LoadInst *LI = cast<LoadInst>(I);
+
// Atomic loads need special handling.
- if (cast<LoadInst>(I)->isAtomic())
+ if (LI->isAtomic())
return false;
MVT VT;
- if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+ if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
return false;
+ const Value *Ptr = LI->getPointerOperand();
+
X86AddressMode AM;
- if (!X86SelectAddress(I->getOperand(0), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
unsigned ResultReg = 0;
- if (X86FastEmitLoad(VT, AM, ResultReg)) {
- UpdateValueMap(I, ResultReg);
- return true;
- }
- return false;
+ if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
}
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
@@ -994,73 +1188,89 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!isTypeLegal(I->getOperand(0)->getType(), VT))
return false;
- unsigned ResultReg = createResultReg(&X86::GR8RegClass);
- unsigned SetCCOpc;
- bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
- switch (CI->getPredicate()) {
- case CmpInst::FCMP_OEQ: {
- if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ unsigned ResultReg = 0;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: {
+ ResultReg = createResultReg(&X86::GR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
+ ResultReg);
+ ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
+ X86::sub_8bit);
+ if (!ResultReg)
return false;
+ break;
+ }
+ case CmpInst::FCMP_TRUE: {
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
+ ResultReg).addImm(1);
+ break;
+ }
+ }
- unsigned EReg = createResultReg(&X86::GR8RegClass);
- unsigned NPReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETEr), EReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::SETNPr), NPReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+ if (ResultReg) {
UpdateValueMap(I, ResultReg);
return true;
}
- case CmpInst::FCMP_UNE: {
- if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+
+ const Value *LHS = CI->getOperand(0);
+ const Value *RHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *RHSC = dyn_cast<ConstantFP>(RHS);
+ if (RHSC && RHSC->isNullValue())
+ RHS = LHS;
+ }
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETEr, X86::SETNPr, X86::AND8rr },
+ { X86::SETNEr, X86::SETPr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
+ case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
+ }
+
+ ResultReg = createResultReg(&X86::GR8RegClass);
+ if (SETFOpc) {
+ if (!X86FastEmitCompare(LHS, RHS, VT))
return false;
- unsigned NEReg = createResultReg(&X86::GR8RegClass);
- unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETNEr), NEReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETPr), PReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::OR8rr),ResultReg)
- .addReg(PReg).addReg(NEReg);
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
+ ResultReg).addReg(FlagReg1).addReg(FlagReg2);
UpdateValueMap(I, ResultReg);
return true;
}
- case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
- case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
- case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break;
- case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break;
- case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
- case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
- case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break;
- case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
- case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break;
- case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break;
- case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
- case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
-
- case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break;
- case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
- case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break;
- case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
- case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break;
- case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
- case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break;
- case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
- case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break;
- case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
- default:
- return false;
- }
- const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ X86::CondCode CC;
+ bool SwapArgs;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+ unsigned Opc = X86::getSETFromCond(CC);
+
if (SwapArgs)
- std::swap(Op0, Op1);
+ std::swap(LHS, RHS);
- // Emit a compare of Op0/Op1.
- if (!X86FastEmitCompare(Op0, Op1, VT))
+ // Emit a compare of LHS/RHS.
+ if (!X86FastEmitCompare(LHS, RHS, VT))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SetCCOpc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1126,73 +1336,88 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Fold the common case of a conditional branch with a comparison
// in the same block (values defined on other blocks may not have
// initialized registers).
+ X86::CondCode CC;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+ // Try to optimize or fold the cmp.
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true;
+ case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true;
+ }
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
+ // 0.0.
+ // We don't have to materialize a zero constant for this case and can just
+ // use %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
+ }
+
// Try to take advantage of fallthrough opportunities.
- CmpInst::Predicate Predicate = CI->getPredicate();
if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
- bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0.
- unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
-
+ // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
+ // code check. Instead two branch instructions are required to check all
+ // the flags. First we change the predicate to a supported condition code,
+ // which will be the first branch. Later one we will emit the second
+ // branch.
+ bool NeedExtraBranch = false;
switch (Predicate) {
+ default: break;
case CmpInst::FCMP_OEQ:
- std::swap(TrueMBB, FalseMBB);
- Predicate = CmpInst::FCMP_UNE;
- // FALL THROUGH
- case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
- case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
- case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break;
- case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break;
- case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
- case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break;
- case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
- case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break;
- case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break;
- case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
- case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
-
- case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break;
- case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
- case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break;
- case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
- case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break;
- case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
- case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break;
- case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
- case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break;
- case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
- default:
- return false;
+ std::swap(TrueMBB, FalseMBB); // fall-through
+ case CmpInst::FCMP_UNE:
+ NeedExtraBranch = true;
+ Predicate = CmpInst::FCMP_ONE;
+ break;
}
- const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+ bool SwapArgs;
+ unsigned BranchOpc;
+ std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ BranchOpc = X86::GetCondBranchFromCond(CC);
if (SwapArgs)
- std::swap(Op0, Op1);
+ std::swap(CmpLHS, CmpRHS);
// Emit a compare of the LHS and RHS, setting the flags.
- if (!X86FastEmitCompare(Op0, Op1, VT))
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT))
return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
.addMBB(TrueMBB);
- if (Predicate == CmpInst::FCMP_UNE) {
- // X86 requires a second branch to handle UNE (and OEQ,
- // which is mapped to UNE above).
+ // X86 requires a second branch to handle UNE (and OEQ, which is mapped
+ // to UNE above).
+ if (NeedExtraBranch) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4))
.addMBB(TrueMBB);
}
+ // Obtain the branch weight and add the TrueBB to the successor list.
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+ // Emits an unconditional branch to the FalseBB, obtains the branch
+ // weight, and adds it to the successor list.
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+
return true;
}
} else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1224,10 +1449,32 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
return true;
}
}
+ } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(BI->getCondition());
+ if (TmpReg == 0)
+ return false;
+
+ unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DbgLoc);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+ return true;
}
// Otherwise do a clumsy setcc and re-test it.
@@ -1241,7 +1488,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DbgLoc);
- FuncInfo.MBB->addSuccessor(TrueMBB);
+ uint32_t BranchWeight = 0;
+ if (FuncInfo.BPI)
+ BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
+ TrueMBB->getBasicBlock());
+ FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
return true;
}
@@ -1478,50 +1729,319 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
return true;
}
-bool X86FastISel::X86SelectSelect(const Instruction *I) {
- MVT VT;
- if (!isTypeLegal(I->getType(), VT))
+/// \brief Emit a conditional move instruction (if the are supported) to lower
+/// the select.
+bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
+ // Check if the subtarget supports these instructions.
+ if (!Subtarget->hasCMov())
return false;
- // We only use cmov here, if we don't have a cmov instruction bail.
- if (!Subtarget->hasCMov()) return false;
+ // FIXME: Add support for i8.
+ if (RetVT < MVT::i16 || RetVT > MVT::i64)
+ return false;
- unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
- if (VT == MVT::i16) {
- Opc = X86::CMOVE16rr;
- RC = &X86::GR16RegClass;
- } else if (VT == MVT::i32) {
- Opc = X86::CMOVE32rr;
- RC = &X86::GR32RegClass;
- } else if (VT == MVT::i64) {
- Opc = X86::CMOVE64rr;
- RC = &X86::GR64RegClass;
- } else {
+ const Value *Cond = I->getOperand(0);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ bool NeedTest = true;
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
+ static unsigned SETFOpcTable[2][3] = {
+ { X86::SETNPr, X86::SETEr , X86::TEST8rr },
+ { X86::SETPr, X86::SETNEr, X86::OR8rr }
+ };
+ unsigned *SETFOpc = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_OEQ:
+ SETFOpc = &SETFOpcTable[0][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ case CmpInst::FCMP_UNE:
+ SETFOpc = &SETFOpcTable[1][0];
+ Predicate = CmpInst::ICMP_NE;
+ break;
+ }
+
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
+ assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ // Emit a compare of the LHS and RHS, setting the flags.
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ return false;
+
+ if (SETFOpc) {
+ unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
+ unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
+ FlagReg1);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
+ FlagReg2);
+ auto const &II = TII.get(SETFOpc[2]);
+ if (II.getNumDefs()) {
+ unsigned TmpReg = createResultReg(&X86::GR8RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+ .addReg(FlagReg2).addReg(FlagReg1);
+ }
+ }
+ NeedTest = false;
+ } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned TmpReg = getRegForValue(Cond);
+ if (TmpReg == 0)
+ return false;
+
+ NeedTest = false;
+ }
+
+ if (NeedTest) {
+ // Selects operate on i1, however, CondReg is 8 bits width and may contain
+ // garbage. Indeed, only the less significant bit is supposed to be
+ // accurate. If we read more than the lsb, we may see non-zero values
+ // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
+ // the select. This is achieved by performing TEST against 1.
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
+ unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
+ LHSReg, LHSIsKill);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+/// \brief Emit SSE instructions to lower the select.
+///
+/// Try to use SSE1/SSE2 instructions to simulate a select without branches.
+/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
+/// SSE instructions are available.
+bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
+ if (!CI || (CI->getParent() != I->getParent()))
return false;
+
+ if (I->getType() != CI->getOperand(0)->getType() ||
+ !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
+ (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+
+ // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
+ // We don't have to materialize a zero constant for this case and can just use
+ // %x again on the RHS.
+ if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
+ const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
+ if (CmpRHSC && CmpRHSC->isNullValue())
+ CmpRHS = CmpLHS;
}
- unsigned Op0Reg = getRegForValue(I->getOperand(0));
- if (Op0Reg == 0) return false;
- unsigned Op1Reg = getRegForValue(I->getOperand(1));
- if (Op1Reg == 0) return false;
- unsigned Op2Reg = getRegForValue(I->getOperand(2));
- if (Op2Reg == 0) return false;
-
- // Selects operate on i1, however, Op0Reg is 8 bits width and may contain
- // garbage. Indeed, only the less significant bit is supposed to be accurate.
- // If we read more than the lsb, we may see non-zero values whereas lsb
- // is zero. Therefore, we have to truncate Op0Reg to i1 for the select.
- // This is achieved by performing TEST against 1.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
- .addReg(Op0Reg).addImm(1);
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(Op1Reg).addReg(Op2Reg);
+ unsigned CC;
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
+ if (CC > 7)
+ return false;
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ static unsigned OpcTable[2][2][4] = {
+ { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr },
+ { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } },
+ { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr },
+ { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } }
+ };
+
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned *Opc = nullptr;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break;
+ case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break;
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ unsigned CmpLHSReg = getRegForValue(CmpLHS);
+ bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
+
+ unsigned CmpRHSReg = getRegForValue(CmpRHS);
+ bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
+
+ if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
+ CmpRHSReg, CmpRHSIsKill, CC);
+ unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+ LHSReg, LHSIsKill);
+ unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+ RHSReg, RHSIsKill);
+ unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
+ AndReg, /*IsKill=*/true);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
+ // These are pseudo CMOV instructions and will be later expanded into control-
+ // flow.
+ unsigned Opc;
+ switch (RetVT.SimpleTy) {
+ default: return false;
+ case MVT::i8: Opc = X86::CMOV_GR8; break;
+ case MVT::i16: Opc = X86::CMOV_GR16; break;
+ case MVT::i32: Opc = X86::CMOV_GR32; break;
+ case MVT::f32: Opc = X86::CMOV_FR32; break;
+ case MVT::f64: Opc = X86::CMOV_FR64; break;
+ }
+
+ const Value *Cond = I->getOperand(0);
+ X86::CondCode CC = X86::COND_NE;
+
+ // Optimize conditions coming from a compare if both instructions are in the
+ // same basic block (values defined in other basic blocks may not have
+ // initialized registers).
+ const auto *CI = dyn_cast<CmpInst>(Cond);
+ if (CI && (CI->getParent() == I->getParent())) {
+ bool NeedSwap;
+ std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
+ if (CC > X86::LAST_VALID_COND)
+ return false;
+
+ const Value *CmpLHS = CI->getOperand(0);
+ const Value *CmpRHS = CI->getOperand(1);
+
+ if (NeedSwap)
+ std::swap(CmpLHS, CmpRHS);
+
+ EVT CmpVT = TLI.getValueType(CmpLHS->getType());
+ if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
+ return false;
+ } else {
+ unsigned CondReg = getRegForValue(Cond);
+ if (CondReg == 0)
+ return false;
+ bool CondIsKill = hasTrivialKill(Cond);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
+ .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+ }
+
+ const Value *LHS = I->getOperand(1);
+ const Value *RHS = I->getOperand(2);
+
+ unsigned LHSReg = getRegForValue(LHS);
+ bool LHSIsKill = hasTrivialKill(LHS);
+
+ unsigned RHSReg = getRegForValue(RHS);
+ bool RHSIsKill = hasTrivialKill(RHS);
+
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+
+ unsigned ResultReg =
+ FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
UpdateValueMap(I, ResultReg);
return true;
}
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+ MVT RetVT;
+ if (!isTypeLegal(I->getType(), RetVT))
+ return false;
+
+ // Check if we can fold the select.
+ if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
+ CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
+ const Value *Opnd = nullptr;
+ switch (Predicate) {
+ default: break;
+ case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
+ case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break;
+ }
+ // No need for a select anymore - this is an unconditional move.
+ if (Opnd) {
+ unsigned OpReg = getRegForValue(Opnd);
+ if (OpReg == 0)
+ return false;
+ bool OpIsKill = hasTrivialKill(Opnd);
+ const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(OpReg, getKillRegState(OpIsKill));
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+
+ // First try to use real conditional move instructions.
+ if (X86FastEmitCMoveSelect(RetVT, I))
+ return true;
+
+ // Try to use a sequence of SSE instructions to simulate a conditional move.
+ if (X86FastEmitSSESelect(RetVT, I))
+ return true;
+
+ // Fall-back to pseudo conditional move instructions, which will be later
+ // converted to control-flow.
+ if (X86FastEmitPseudoSelect(RetVT, I))
+ return true;
+
+ return false;
+}
+
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
// fpext from float to double.
if (X86ScalarSSEf64 &&
@@ -1633,8 +2153,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
}
unsigned Reg;
- bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
- RV &= X86FastEmitStore(VT, Reg, DestAM);
+ bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
+ RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
assert(RV && "Failed to emit load or store??");
unsigned Size = VT.getSizeInBits()/8;
@@ -1646,10 +2166,74 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
return true;
}
+static bool isCommutativeIntrinsic(IntrinsicInst const &I) {
+ switch (I.getIntrinsicID()) {
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::frameaddress: {
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Invalid result type for frameaddress.");
+ case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
+ case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
+ }
+
+ // This needs to be set before we call getFrameRegister, otherwise we get
+ // the wrong frame register.
+ MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
+ unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF));
+ assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
+ (FrameReg == X86::EBP && VT == MVT::i32)) &&
+ "Invalid Frame Register!");
+
+ // Always make a copy of the frame register to to a vreg first, so that we
+ // never directly reference the frame register (the TwoAddressInstruction-
+ // Pass doesn't like that).
+ unsigned SrcReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
+
+ // Now recursively load from the frame address.
+ // movq (%rbp), %rax
+ // movq (%rax), %rax
+ // movq (%rax), %rax
+ // ...
+ unsigned DestReg;
+ unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
+ while (Depth--) {
+ DestReg = createResultReg(RC);
+ addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), DestReg), SrcReg);
+ SrcReg = DestReg;
+ }
+
+ UpdateValueMap(&I, SrcReg);
+ return true;
+ }
case Intrinsic::memcpy: {
const MemCpyInst &MCI = cast<MemCpyInst>(I);
// Don't handle volatile or variable length memcpys.
@@ -1726,52 +2310,233 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
return true;
}
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow: {
- // FIXME: Should fold immediates.
+ case Intrinsic::sqrt: {
+ if (!Subtarget->hasSSE1())
+ return false;
- // Replace "add with overflow" intrinsics with an "add" instruction followed
- // by a seto/setc instruction.
- const Function *Callee = I.getCalledFunction();
- Type *RetTy =
- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+ Type *RetTy = I.getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeLegal(RetTy, VT))
return false;
- const Value *Op1 = I.getArgOperand(0);
- const Value *Op2 = I.getArgOperand(1);
- unsigned Reg1 = getRegForValue(Op1);
- unsigned Reg2 = getRegForValue(Op2);
+ // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT
+ // is not generated by FastISel yet.
+ // FIXME: Update this code once tablegen can handle it.
+ static const unsigned SqrtOpc[2][2] = {
+ {X86::SQRTSSr, X86::VSQRTSSr},
+ {X86::SQRTSDr, X86::VSQRTSDr}
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ const TargetRegisterClass *RC;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
+ case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
+ }
+
+ const Value *SrcVal = I.getArgOperand(0);
+ unsigned SrcReg = getRegForValue(SrcVal);
- if (Reg1 == 0 || Reg2 == 0)
- // FIXME: Handle values *not* in registers.
+ if (SrcReg == 0)
return false;
- unsigned OpC = 0;
- if (VT == MVT::i32)
- OpC = X86::ADD32rr;
- else if (VT == MVT::i64)
- OpC = X86::ADD64rr;
- else
+ unsigned ImplicitDefReg = 0;
+ if (HasAVX) {
+ ImplicitDefReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg);
+
+ if (ImplicitDefReg)
+ MIB.addReg(ImplicitDefReg);
+
+ MIB.addReg(SrcReg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: {
+ // This implements the basic lowering of the xalu with overflow intrinsics
+ // into add/sub/mul followed by either seto or setb.
+ const Function *Callee = I.getCalledFunction();
+ auto *Ty = cast<StructType>(Callee->getReturnType());
+ Type *RetTy = Ty->getTypeAtIndex(0U);
+ Type *CondTy = Ty->getTypeAtIndex(1);
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ if (VT < MVT::i8 || VT > MVT::i64)
+ return false;
+
+ const Value *LHS = I.getArgOperand(0);
+ const Value *RHS = I.getArgOperand(1);
+
+ // Canonicalize immediate to the RHS.
+ if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
+ isCommutativeIntrinsic(I))
+ std::swap(LHS, RHS);
+
+ unsigned BaseOpc, CondOpc;
+ switch (I.getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::sadd_with_overflow:
+ BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
+ case Intrinsic::uadd_with_overflow:
+ BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
+ case Intrinsic::ssub_with_overflow:
+ BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
+ case Intrinsic::usub_with_overflow:
+ BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
+ case Intrinsic::smul_with_overflow:
+ BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
+ case Intrinsic::umul_with_overflow:
+ BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
+ }
+
+ unsigned LHSReg = getRegForValue(LHS);
+ if (LHSReg == 0)
return false;
+ bool LHSIsKill = hasTrivialKill(LHS);
- // The call to CreateRegs builds two sequential registers, to store the
- // both the returned values.
- unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpC), ResultReg)
- .addReg(Reg1).addReg(Reg2);
+ unsigned ResultReg = 0;
+ // Check if we have an immediate version.
+ if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
+ ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
+ C->getZExtValue());
+ }
- unsigned Opc = X86::SETBr;
- if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
- Opc = X86::SETOr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
- ResultReg + 1);
+ unsigned RHSReg;
+ bool RHSIsKill;
+ if (!ResultReg) {
+ RHSReg = getRegForValue(RHS);
+ if (RHSReg == 0)
+ return false;
+ RHSIsKill = hasTrivialKill(RHS);
+ ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
+ RHSIsKill);
+ }
+
+ // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
+ // it manually.
+ if (BaseOpc == X86ISD::UMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
+ static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
+ // First copy the first operand into RAX, which is an implicit input to
+ // the X86::MUL*r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
+ } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
+ static const unsigned MULOpc[] =
+ { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
+ if (VT == MVT::i8) {
+ // Copy the first operand into AL, which is an implicit input to the
+ // X86::IMUL8r instruction.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), X86::AL)
+ .addReg(LHSReg, getKillRegState(LHSIsKill));
+ ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
+ RHSIsKill);
+ } else
+ ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
+ TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
+ RHSReg, RHSIsKill);
+ }
+
+ if (!ResultReg)
+ return false;
+
+ unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+ assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
+ ResultReg2);
UpdateValueMap(&I, ResultReg, 2);
return true;
}
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64: {
+ bool IsInputDouble;
+ switch (I.getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse_cvttss2si:
+ case Intrinsic::x86_sse_cvttss2si64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ IsInputDouble = false;
+ break;
+ case Intrinsic::x86_sse2_cvttsd2si:
+ case Intrinsic::x86_sse2_cvttsd2si64:
+ if (!Subtarget->hasSSE2())
+ return false;
+ IsInputDouble = true;
+ break;
+ }
+
+ Type *RetTy = I.getCalledFunction()->getReturnType();
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ static const unsigned CvtOpc[2][2][2] = {
+ { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr },
+ { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } },
+ { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr },
+ { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } }
+ };
+ bool HasAVX = Subtarget->hasAVX();
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected result type.");
+ case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
+ case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
+ }
+
+ // Check if we can fold insertelement instructions into the convert.
+ const Value *Op = I.getArgOperand(0);
+ while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
+ const Value *Index = IE->getOperand(2);
+ if (!isa<ConstantInt>(Index))
+ break;
+ unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+
+ if (Idx == 0) {
+ Op = IE->getOperand(1);
+ break;
+ }
+ Op = IE->getOperand(0);
+ }
+
+ unsigned Reg = getRegForValue(Op);
+ if (Reg == 0)
+ return false;
+
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Reg);
+
+ UpdateValueMap(&I, ResultReg);
+ return true;
+ }
}
}
@@ -1794,31 +2559,43 @@ bool X86FastISel::FastLowerArguments() {
return false;
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
- unsigned Idx = 1;
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- if (Idx > 6)
- return false;
-
+ unsigned GPRCnt = 0;
+ unsigned FPRCnt = 0;
+ unsigned Idx = 0;
+ for (auto const &Arg : F->args()) {
+ // The first argument is at index 1.
+ ++Idx;
if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
F->getAttributes().hasAttribute(Idx, Attribute::Nest))
return false;
- Type *ArgTy = I->getType();
+ Type *ArgTy = Arg.getType();
if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
return false;
EVT ArgVT = TLI.getValueType(ArgTy);
if (!ArgVT.isSimple()) return false;
switch (ArgVT.getSimpleVT().SimpleTy) {
+ default: return false;
case MVT::i32:
case MVT::i64:
+ ++GPRCnt;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (!Subtarget->hasSSE1())
+ return false;
+ ++FPRCnt;
break;
- default:
- return false;
}
+
+ if (GPRCnt > 6)
+ return false;
+
+ if (FPRCnt > 8)
+ return false;
}
static const MCPhysReg GPR32ArgRegs[] = {
@@ -1827,24 +2604,33 @@ bool X86FastISel::FastLowerArguments() {
static const MCPhysReg GPR64ArgRegs[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
};
+ static const MCPhysReg XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
- Idx = 0;
- const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
- const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
- for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I, ++Idx) {
- bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
- const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
- unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
+ unsigned GPRIdx = 0;
+ unsigned FPRIdx = 0;
+ for (auto const &Arg : F->args()) {
+ MVT VT = TLI.getSimpleValueType(Arg.getType());
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+ unsigned SrcReg;
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type.");
+ case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
+ case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
+ case MVT::f32: // fall-through
+ case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
+ }
unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
// Without this, EmitLiveInCopies may eliminate the livein if its only
// use is a bitcast (which isn't turned into an instruction).
unsigned ResultReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY),
- ResultReg).addReg(DstReg, getKillRegState(true));
- UpdateValueMap(I, ResultReg);
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(DstReg, getKillRegState(true));
+ UpdateValueMap(&Arg, ResultReg);
}
return true;
}
@@ -2147,7 +2933,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (!X86FastEmitStore(ArgVT, ArgVal, AM))
return false;
} else {
- if (!X86FastEmitStore(ArgVT, Arg, AM))
+ if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM))
return false;
}
}
@@ -2430,7 +3216,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
return 0;
}
- // Materialize addresses with LEA instructions.
+ // Materialize addresses with LEA/MOV instructions.
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
if (X86SelectAddress(C, AM)) {
@@ -2440,10 +3226,19 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
return AM.Base.Reg;
- Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ if (TM.getRelocationModel() == Reloc::Static &&
+ TLI.getPointerTy() == MVT::i64) {
+ // The displacement code be more than 32 bits away so we need to use
+ // an instruction with a 64 bit immediate
+ Opc = X86::MOV64ri;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C));
+ } else {
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc), ResultReg), AM);
+ }
return ResultReg;
}
return 0;
@@ -2544,8 +3339,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
const LoadInst *LI) {
+ const Value *Ptr = LI->getPointerOperand();
X86AddressMode AM;
- if (!X86SelectAddress(LI->getOperand(0), AM))
+ if (!X86SelectAddress(Ptr, AM))
return false;
const X86InstrInfo &XII = (const X86InstrInfo&)TII;
@@ -2553,13 +3349,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ Alignment = DL.getABITypeAlignment(LI->getType());
+
SmallVector<MachineOperand, 8> AddrOps;
AM.getFullAddress(AddrOps);
MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
- if (!Result) return false;
+ if (!Result)
+ return false;
+ Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
return true;
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 6c5b86f..4be766a 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -32,86 +32,89 @@ using namespace llvm;
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
- class FixupLEAPass : public MachineFunctionPass {
- enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
- static char ID;
- /// \brief Loop over all of the instructions in the basic block
- /// replacing applicable instructions with LEA instructions,
- /// where appropriate.
- bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
+class FixupLEAPass : public MachineFunctionPass {
+ enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
+ static char ID;
+ /// \brief Loop over all of the instructions in the basic block
+ /// replacing applicable instructions with LEA instructions,
+ /// where appropriate.
+ bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
- const char *getPassName() const override { return "X86 Atom LEA Fixup";}
+ const char *getPassName() const override { return "X86 Atom LEA Fixup"; }
- /// \brief Given a machine register, look for the instruction
- /// which writes it in the current basic block. If found,
- /// try to replace it with an equivalent LEA instruction.
- /// If replacement succeeds, then also process the the newly created
- /// instruction.
- void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a machine register, look for the instruction
+ /// which writes it in the current basic block. If found,
+ /// try to replace it with an equivalent LEA instruction.
+ /// If replacement succeeds, then also process the the newly created
+ /// instruction.
+ void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Given a memory access or LEA instruction
- /// whose address mode uses a base and/or index register, look for
- /// an opportunity to replace the instruction which sets the base or index
- /// register with an equivalent LEA instruction.
- void processInstruction(MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a memory access or LEA instruction
+ /// whose address mode uses a base and/or index register, look for
+ /// an opportunity to replace the instruction which sets the base or index
+ /// register with an equivalent LEA instruction.
+ void processInstruction(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Given a LEA instruction which is unprofitable
- /// on Silvermont try to replace it with an equivalent ADD instruction
- void processInstructionForSLM(MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Given a LEA instruction which is unprofitable
+ /// on Silvermont try to replace it with an equivalent ADD instruction
+ void processInstructionForSLM(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief Determine if an instruction references a machine register
- /// and, if so, whether it reads or writes the register.
- RegUsageState usesRegister(MachineOperand& p,
- MachineBasicBlock::iterator I);
+ /// \brief Determine if an instruction references a machine register
+ /// and, if so, whether it reads or writes the register.
+ RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
- /// \brief Step backwards through a basic block, looking
- /// for an instruction which writes a register within
- /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
- MachineBasicBlock::iterator searchBackwards(MachineOperand& p,
- MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI);
+ /// \brief Step backwards through a basic block, looking
+ /// for an instruction which writes a register within
+ /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
+ MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
+ MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI);
- /// \brief if an instruction can be converted to an
- /// equivalent LEA, insert the new instruction into the basic block
- /// and return a pointer to it. Otherwise, return zero.
- MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI,
- MachineBasicBlock::iterator &MBBI) const;
+ /// \brief if an instruction can be converted to an
+ /// equivalent LEA, insert the new instruction into the basic block
+ /// and return a pointer to it. Otherwise, return zero.
+ MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI) const;
- public:
- FixupLEAPass() : MachineFunctionPass(ID) {}
+public:
+ FixupLEAPass() : MachineFunctionPass(ID) {}
- /// \brief Loop over all of the basic blocks,
- /// replacing instructions by equivalent LEA instructions
- /// if needed and when possible.
- bool runOnMachineFunction(MachineFunction &MF) override;
+ /// \brief Loop over all of the basic blocks,
+ /// replacing instructions by equivalent LEA instructions
+ /// if needed and when possible.
+ bool runOnMachineFunction(MachineFunction &MF) override;
- private:
- MachineFunction *MF;
- const TargetMachine *TM;
- const X86InstrInfo *TII; // Machine instruction info.
-
- };
- char FixupLEAPass::ID = 0;
+private:
+ MachineFunction *MF;
+ const TargetMachine *TM;
+ const X86InstrInfo *TII; // Machine instruction info.
+};
+char FixupLEAPass::ID = 0;
}
MachineInstr *
FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI) const {
- MachineInstr* MI = MBBI;
- MachineInstr* NewMI;
+ MachineInstr *MI = MBBI;
+ MachineInstr *NewMI;
switch (MI->getOpcode()) {
case X86::MOV32rr:
case X86::MOV64rr: {
- const MachineOperand& Src = MI->getOperand(1);
- const MachineOperand& Dest = MI->getOperand(0);
+ const MachineOperand &Src = MI->getOperand(1);
+ const MachineOperand &Dest = MI->getOperand(0);
NewMI = BuildMI(*MF, MI->getDebugLoc(),
- TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r))
- .addOperand(Dest)
- .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0);
- MFI->insert(MBBI, NewMI); // Insert the new inst
+ TII->get(MI->getOpcode() == X86::MOV32rr ? X86::LEA32r
+ : X86::LEA64r))
+ .addOperand(Dest)
+ .addOperand(Src)
+ .addImm(1)
+ .addReg(0)
+ .addImm(0)
+ .addReg(0);
+ MFI->insert(MBBI, NewMI); // Insert the new inst
return NewMI;
}
case X86::ADD64ri32:
@@ -144,17 +147,16 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
return TII->convertToThreeAddress(MFI, MBBI, nullptr);
}
-FunctionPass *llvm::createX86FixupLEAs() {
- return new FixupLEAPass();
-}
+FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
TM = &Func.getTarget();
const X86Subtarget &ST = TM->getSubtarget<X86Subtarget>();
if (!ST.LEAusesAG() && !ST.slowLEA())
return false;
- TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo());
+ TII = static_cast<const X86InstrInfo *>(TM->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -165,14 +167,14 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
return true;
}
-FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
- MachineBasicBlock::iterator I) {
+FixupLEAPass::RegUsageState
+FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
- MachineInstr* MI = I;
+ MachineInstr *MI = I;
for (unsigned int i = 0; i < MI->getNumOperands(); ++i) {
- MachineOperand& opnd = MI->getOperand(i);
- if (opnd.isReg() && opnd.getReg() == p.getReg()){
+ MachineOperand &opnd = MI->getOperand(i);
+ if (opnd.isReg() && opnd.getReg() == p.getReg()) {
if (opnd.isDef())
return RU_Write;
RegUsage = RU_Read;
@@ -185,23 +187,22 @@ FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p,
/// block, return a reference to the previous instruction in the block,
/// wrapping around to the last instruction of the block if the block
/// branches to itself.
-static inline bool getPreviousInstr(MachineBasicBlock::iterator& I,
+static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
if (I == MFI->begin()) {
if (MFI->isPredecessor(MFI)) {
I = --MFI->end();
return true;
- }
- else
+ } else
return false;
}
--I;
return true;
}
-MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
- MachineBasicBlock::iterator& I,
- MachineFunction::iterator MFI) {
+MachineBasicBlock::iterator
+FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
int InstrDistance = 1;
MachineBasicBlock::iterator CurInst;
static const int INSTR_DISTANCE_THRESHOLD = 5;
@@ -209,12 +210,12 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
CurInst = I;
bool Found;
Found = getPreviousInstr(CurInst, MFI);
- while( Found && I != CurInst) {
+ while (Found && I != CurInst) {
if (CurInst->isCall() || CurInst->isInlineAsm())
break;
if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
break; // too far back to make a difference
- if (usesRegister(p, CurInst) == RU_Write){
+ if (usesRegister(p, CurInst) == RU_Write) {
return CurInst;
}
InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
@@ -223,32 +224,32 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
return nullptr;
}
-void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
+void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
// Process a load, store, or LEA instruction.
MachineInstr *MI = I;
int opcode = MI->getOpcode();
- const MCInstrDesc& Desc = MI->getDesc();
+ const MCInstrDesc &Desc = MI->getDesc();
int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode);
if (AddrOffset >= 0) {
AddrOffset += X86II::getOperandBias(Desc);
- MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
+ MachineOperand &p = MI->getOperand(AddrOffset + X86::AddrBaseReg);
if (p.isReg() && p.getReg() != X86::ESP) {
seekLEAFixup(p, I, MFI);
}
- MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
+ MachineOperand &q = MI->getOperand(AddrOffset + X86::AddrIndexReg);
if (q.isReg() && q.getReg() != X86::ESP) {
seekLEAFixup(q, I, MFI);
}
}
}
-void FixupLEAPass::seekLEAFixup(MachineOperand& p,
- MachineBasicBlock::iterator& I,
+void FixupLEAPass::seekLEAFixup(MachineOperand &p,
+ MachineBasicBlock::iterator &I,
MachineFunction::iterator MFI) {
MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI);
if (MBI) {
- MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
+ MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI);
if (NewMI) {
++NumLEAs;
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
@@ -256,7 +257,7 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
MFI->erase(MBI);
MachineBasicBlock::iterator J =
- static_cast<MachineBasicBlock::iterator> (NewMI);
+ static_cast<MachineBasicBlock::iterator>(NewMI);
processInstruction(J, MFI);
}
}
@@ -299,7 +300,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
}
DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
DEBUG(dbgs() << "FixLEA: Replaced by: ";);
- MachineInstr *NewMI = 0;
+ MachineInstr *NewMI = nullptr;
const MachineOperand &Dst = MI->getOperand(0);
// Make ADD instruction for two registers writing to LEA's destination
if (SrcR1 != 0 && SrcR2 != 0) {
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 4c1374f..8c029a8 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -45,7 +46,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineModuleInfo &MMI = MF.getMMI();
- const TargetRegisterInfo *RegInfo = TM.getRegisterInfo();
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->needsStackRealignment(MF) ||
@@ -305,65 +306,25 @@ static bool isEAXLiveIn(MachineFunction &MF) {
return false;
}
-void X86FrameLowering::emitCalleeSavedFrameMoves(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const {
+void
+X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
if (CSI.empty()) return;
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -RegInfo->getSlotSize();
-
- // FIXME: This is dirty hack. The code itself is pretty mess right now.
- // It should be rewritten from scratch and generalized sometimes.
-
- // Determine maximum offset (minimum due to stack growth).
- int64_t MaxOffset = 0;
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I)
- MaxOffset = std::min(MaxOffset,
- MFI->getObjectOffset(I->getFrameIdx()));
-
// Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
unsigned Reg = I->getReg();
- Offset = MaxOffset - Offset + saveAreaOffset;
-
- // Don't output a new machine move if we're re-saving the frame
- // pointer. This happens when the PrologEpilogInserter has inserted an extra
- // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
- // generates one when frame pointers are used. If we generate a "machine
- // move" for this extra "PUSH", the linker will lose track of the fact that
- // the frame pointer should have the value of the first "PUSH" when it's
- // trying to unwind.
- //
- // FIXME: This looks inelegant. It's possibly correct, but it's covering up
- // another bug. I.e., one where we generate a prolog like this:
- //
- // pushl %ebp
- // movl %esp, %ebp
- // pushl %ebp
- // pushl %esi
- // ...
- //
- // The immediate re-push of EBP is unnecessary. At the least, it's an
- // optimization bug. EBP can be used as a scratch register in certain
- // cases, but probably not when we have a frame pointer.
- if (HasFP && FramePtr == Reg)
- continue;
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex =
@@ -395,23 +356,107 @@ static bool usesTheStack(const MachineFunction &MF) {
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
/// generate the exception handling frames.
+
+/*
+ Here's a gist of what gets emitted:
+
+ ; Establish frame pointer, if needed
+ [if needs FP]
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset %rbp, -16
+ .seh_pushreg %rpb
+ mov %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+
+ ; Spill general-purpose registers
+ [for all callee-saved GPRs]
+ pushq %<reg>
+ [if not needs FP]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ .seh_pushreg %<reg>
+
+ ; If the required stack alignment > default stack alignment
+ ; rsp needs to be re-aligned. This creates a "re-alignment gap"
+ ; of unknown size in the stack frame.
+ [if stack needs re-alignment]
+ and $MASK, %rsp
+
+ ; Allocate space for locals
+ [if target is Windows and allocated space > 4096 bytes]
+ ; Windows needs special care for allocations larger
+ ; than one page.
+ mov $NNN, %rax
+ call ___chkstk_ms/___chkstk
+ sub %rax, %rsp
+ [else]
+ sub $NNN, %rsp
+
+ [if needs FP]
+ .seh_stackalloc (size of XMM spill slots)
+ .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
+ [else]
+ .seh_stackalloc NNN
+
+ ; Spill XMMs
+ ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
+ ; they may get spilled on any platform, if the current function
+ ; calls @llvm.eh.unwind.init
+ [if needs FP]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, -MMM(%rbp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
+ ; i.e. the offset relative to (%rbp - SEHFrameOffset)
+ [else]
+ [for all callee-saved XMM registers]
+ movaps %<xmm reg>, KKK(%rsp)
+ [for all callee-saved XMM registers]
+ .seh_savexmm %<xmm reg>, KKK
+
+ .seh_endprologue
+
+ [if needs base pointer]
+ mov %rsp, %rbx
+
+ ; Emit CFI info
+ [if needs FP]
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rbp)
+ [else]
+ .cfi_def_cfa_offset (offset from RETADDR)
+ [for all callee-saved registers]
+ .cfi_offset %<reg>, (offset from %rsp)
+
+ Notes:
+ - .seh directives are emitted only for Windows 64 ABI
+ - .cfi directives are emitted for all other ABIs
+ - for 32-bit code, substitute %e?? registers for %r??
+*/
+
void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
MachineBasicBlock::iterator MBBI = MBB.begin();
MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *Fn = MF.getFunction();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineModuleInfo &MMI = MF.getMMI();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() ||
- Fn->needsUnwindTableEntry();
uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate.
bool HasFP = hasFP(MF);
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
bool IsWin64 = STI.isTargetWin64();
+ bool IsWinEH =
+ MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
+ ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
+ bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
+ bool NeedsDwarfCFI =
+ !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
bool UseLEA = STI.useLeaForSP();
unsigned StackAlign = getStackAlignment();
unsigned SlotSize = RegInfo->getSlotSize();
@@ -509,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addReg(FramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark the place where EBP/RBP was saved.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -527,13 +572,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
+ if (NeedsWinEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+ .addImm(FramePtr)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Update EBP with the new base value.
BuildMI(MBB, MBBI, DL,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
- if (needsFrameMoves) {
+ if (NeedsDwarfCFI) {
// Mark effective beginning of when frame pointer becomes valid.
// Define the current CFA to use the EBP/RBP register.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
@@ -543,9 +594,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
- // Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I)
+ // Mark the FramePtr as live-in in every block.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
I->addLiveIn(FramePtr);
} else {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
@@ -559,10 +609,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
(MBBI->getOpcode() == X86::PUSH32r ||
MBBI->getOpcode() == X86::PUSH64r)) {
PushedRegs = true;
- MBBI->setFlag(MachineInstr::FrameSetup);
+ unsigned Reg = MBBI->getOperand(0).getReg();
++MBBI;
- if (!HasFP && needsFrameMoves) {
+ if (!HasFP && NeedsDwarfCFI) {
// Mark callee-saved push instruction.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
@@ -572,16 +622,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
StackOffset += stackGrowth;
}
+
+ if (NeedsWinEH) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
+ MachineInstr::FrameSetup);
+ }
}
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
-
- // NOTE: We push the registers before realigning the stack, so
- // vector callee-saved (xmm) registers may be saved w/o proper
- // alignment in this way. However, currently these regs are saved in
- // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
- // this shouldn't be a problem.
if (RegInfo->needsStackRealignment(MF)) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
MachineInstr *MI =
@@ -680,23 +729,88 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
MI->setFlag(MachineInstr::FrameSetup);
MBB.insert(MBBI, MI);
}
- } else if (NumBytes)
+ } else if (NumBytes) {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
UseLEA, TII, *RegInfo);
+ }
+
+ int SEHFrameOffset = 0;
+ if (NeedsWinEH) {
+ if (HasFP) {
+ // We need to set frame base offset low enough such that all saved
+ // register offsets would be positive relative to it, but we can't
+ // just use NumBytes, because .seh_setframe offset must be <=240.
+ // So we pretend to have only allocated enough space to spill the
+ // non-volatile registers.
+ // We don't care about the rest of stack allocation, because unwinder
+ // will restore SP to (BP - SEHFrameOffset)
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ int offset = MFI->getObjectOffset(Info.getFrameIdx());
+ SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+ }
+ SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
+
+ // This only needs to account for XMM spill slots, GPR slots
+ // are covered by the .seh_pushreg's emitted above.
+ unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
+ if (Size) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(Size)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
+ .addImm(FramePtr)
+ .addImm(SEHFrameOffset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ // SP will be the base register for restoring XMMs
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+ }
+ }
+
+ // Skip the rest of register spilling code
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+ ++MBBI;
+
+ // Emit SEH info for non-GPRs
+ if (NeedsWinEH) {
+ for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
+ unsigned Reg = Info.getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+ assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
+
+ int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
+ Offset += SEHFrameOffset;
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
+ .addImm(Reg)
+ .addImm(Offset)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
// If we need a base pointer, set it up here. It's whatever the value
// of the stack pointer is at this point. Any variable size objects
// will be allocated after this, so we can still use the base pointer
// to reference locals.
if (RegInfo->hasBasePointer(MF)) {
- // Update the frame pointer with the current stack pointer.
+ // Update the base pointer with the current stack pointer.
unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
}
- if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+ if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
// Mark end of stack pointer adjustment.
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -711,7 +825,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Emit DWARF info specifying the offsets of the callee-saved registers.
if (PushedRegs)
- emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr);
+ emitCalleeSavedFrameMoves(MBB, MBBI, DL);
}
}
@@ -719,12 +833,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
assert(MBBI != MBB.end() && "Returning block has no instructions");
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc DL = MBBI->getDebugLoc();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
bool IsLP64 = STI.isTarget64BitLP64();
bool UseLEA = STI.useLeaForSP();
@@ -969,46 +1085,97 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return getFrameIndexOffset(MF, FI);
}
-bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
+bool X86FrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
+ unsigned SlotSize = RegInfo->getSlotSize();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
- DebugLoc DL = MBB.findDebugLoc(MI);
+ unsigned CalleeSavedFrameSize = 0;
+ int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
- MachineFunction &MF = *MBB.getParent();
+ if (hasFP(MF)) {
+ // emitPrologue always spills frame register the first thing.
+ SpillSlotOffset -= SlotSize;
+ MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+
+ // Since emitPrologue and emitEpilogue will handle spilling and restoring of
+ // the frame register, we can delete it from CSI list and not have to worry
+ // about avoiding it later.
+ unsigned FPReg = RegInfo->getFrameRegister(MF);
+ for (unsigned i = 0; i < CSI.size(); ++i) {
+ if (CSI[i].getReg() == FPReg) {
+ CSI.erase(CSI.begin() + i);
+ break;
+ }
+ }
+ }
+
+ // Assign slots for GPRs. It increases frame size.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
+ continue;
- unsigned SlotSize = STI.is64Bit() ? 8 : 4;
- unsigned FPReg = TRI->getFrameRegister(MF);
- unsigned CalleeFrameSize = 0;
+ SpillSlotOffset -= SlotSize;
+ CalleeSavedFrameSize += SlotSize;
+
+ int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ }
+
+ X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
+
+ // Assign slots for XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i - 1].getReg();
+ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+ // ensure alignment
+ SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+ // spill into slot
+ SpillSlotOffset -= RC->getSize();
+ int SlotIndex =
+ MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
+ CSI[i - 1].setFrameIdx(SlotIndex);
+ MFI->ensureMaxAlignment(RC->getAlignment());
+ }
+
+ return true;
+}
+bool X86FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL = MBB.findDebugLoc(MI);
+
+ MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
- unsigned Reg = CSI[i-1].getReg();
- if (!X86::GR64RegClass.contains(Reg) &&
- !X86::GR32RegClass.contains(Reg))
+ unsigned Reg = CSI[i - 1].getReg();
+
+ if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
continue;
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
- if (Reg == FPReg)
- // X86RegisterInfo::emitPrologue will handle spilling of frame register.
- continue;
- CalleeFrameSize += SlotSize;
+
BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
}
- X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
-
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
- // Note that only Win64 ABI might spill XMMs.
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (X86::GR64RegClass.contains(Reg) ||
@@ -1017,8 +1184,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, TRI);
+
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
+ TRI);
+ --MI;
+ MI->setFlag(MachineInstr::FrameSetup);
+ ++MI;
}
return true;
@@ -1035,6 +1206,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
// Reload XMMs from stack frame.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -1042,22 +1214,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (X86::GR64RegClass.contains(Reg) ||
X86::GR32RegClass.contains(Reg))
continue;
+
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
}
// POP GPRs.
- unsigned FPReg = TRI->getFrameRegister(MF);
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
- if (Reg == FPReg)
- // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
- continue;
+
BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
}
return true;
@@ -1065,9 +1234,10 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
void
X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
+ RegScavenger *RS) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const X86RegisterInfo *RegInfo =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1087,22 +1257,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
TailCallReturnAddrDelta - SlotSize, true);
}
- if (hasFP(MF)) {
- assert((TailCallReturnAddrDelta <= 0) &&
- "The Delta should always be zero or negative");
- const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
-
- // Create a frame entry for the EBP register that must be saved.
- int FrameIdx = MFI->CreateFixedObject(SlotSize,
- -(int)SlotSize +
- TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta,
- true);
- assert(FrameIdx == MFI->getObjectIndexBegin() &&
- "Slot for EBP register must be last in order to be found!");
- (void)FrameIdx;
- }
-
// Spill the BasePtr if it's used.
if (RegInfo->hasBasePointer(MF))
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
@@ -1160,8 +1314,9 @@ void
X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
uint64_t StackSize;
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool Is64Bit = STI.is64Bit();
unsigned TlsReg, TlsOffset;
DebugLoc DL;
@@ -1368,9 +1523,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
/// temp0 = sp - MaxStack
/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
- const X86InstrInfo &TII = *TM.getInstrInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
- const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize();
+ const unsigned SlotSize =
+ static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo())
+ ->getSlotSize();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
const bool Is64Bit = STI.is64Bit();
DebugLoc DL;
// HiPE-specific values
@@ -1499,12 +1657,14 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
- const X86InstrInfo &TII = *TM.getInstrInfo();
- const X86RegisterInfo &RegInfo = *TM.getRegisterInfo();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const X86RegisterInfo &RegInfo =
+ *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
bool reseveCallFrame = hasReservedCallFrame(MF);
int Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
+ const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
@@ -1522,7 +1682,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// We need to keep the stack aligned properly. To do this, we round the
// amount of space needed for the outgoing arguments up to the next
// alignment boundary.
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign =
+ MF.getTarget().getFrameLowering()->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
MachineInstr *New = nullptr;
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 208bb8b..5ad3d4d 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -14,7 +14,6 @@
#ifndef X86_FRAMELOWERING_H
#define X86_FRAMELOWERING_H
-#include "X86Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
@@ -23,19 +22,13 @@ class MCSymbol;
class X86TargetMachine;
class X86FrameLowering : public TargetFrameLowering {
- const X86TargetMachine &TM;
- const X86Subtarget &STI;
public:
- explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
- : TargetFrameLowering(StackGrowsDown,
- sti.getStackAlignment(),
- (sti.is64Bit() ? -8 : -4)),
- TM(tm), STI(sti) {
- }
+ explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO)
+ : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned FramePtr) const;
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc DL) const;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -49,6 +42,11 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 74386d3..ba2f5f6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2126,38 +2126,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return getGlobalBaseReg();
- case X86ISD::ATOMOR64_DAG:
- case X86ISD::ATOMXOR64_DAG:
- case X86ISD::ATOMADD64_DAG:
- case X86ISD::ATOMSUB64_DAG:
- case X86ISD::ATOMNAND64_DAG:
- case X86ISD::ATOMAND64_DAG:
- case X86ISD::ATOMMAX64_DAG:
- case X86ISD::ATOMMIN64_DAG:
- case X86ISD::ATOMUMAX64_DAG:
- case X86ISD::ATOMUMIN64_DAG:
- case X86ISD::ATOMSWAP64_DAG: {
- unsigned Opc;
- switch (Opcode) {
- default: llvm_unreachable("Impossible opcode");
- case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break;
- case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break;
- case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break;
- case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break;
- case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break;
- case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break;
- case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break;
- case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break;
- case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break;
- case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break;
- case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break;
- }
- SDNode *RetVal = SelectAtomic64(Node, Opc);
- if (RetVal)
- return RetVal;
- break;
- }
-
case ISD::ATOMIC_LOAD_XOR:
case ISD::ATOMIC_LOAD_AND:
case ISD::ATOMIC_LOAD_OR:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index cbaf44e..5ccff20 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -44,11 +44,13 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <bitset>
+#include <numeric>
#include <cctype>
using namespace llvm;
@@ -56,6 +58,17 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
+static cl::opt<bool> ExperimentalVectorWideningLegalization(
+ "x86-experimental-vector-widening-legalization", cl::init(false),
+ cl::desc("Enable an experimental vector type legalization through widening "
+ "rather than promotion."),
+ cl::Hidden);
+
+static cl::opt<bool> ExperimentalVectorShuffleLowering(
+ "x86-experimental-vector-shuffle-lowering", cl::init(false),
+ cl::desc("Enable an experimental vector shuffle lowering code path."),
+ cl::Hidden);
+
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
@@ -178,29 +191,28 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
}
-static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- bool is64Bit = Subtarget->is64Bit();
-
- if (Subtarget->isTargetMacho()) {
- if (is64Bit)
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::x86_64)
return new X86_64MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
}
- if (Subtarget->isTargetLinux())
+ if (TT.isOSLinux())
return new X86LinuxTargetObjectFile();
- if (Subtarget->isTargetELF())
+ if (TT.isOSBinFormatELF())
return new TargetLoweringObjectFileELF();
- if (Subtarget->isTargetKnownWindowsMSVC())
+ if (TT.isKnownWindowsMSVCEnvironment())
return new X86WindowsTargetObjectFile();
- if (Subtarget->isTargetCOFF())
+ if (TT.isOSBinFormatCOFF())
return new TargetLoweringObjectFileCOFF();
llvm_unreachable("unknown subtarget type");
}
+// FIXME: This should stop caching the target machine as soon as
+// we can remove resetOperationActions et al.
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
@@ -443,7 +455,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
setOperationAction(ISD::BR_CC , MVT::i64, Expand);
- setOperationAction(ISD::SELECT_CC , MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
+ setOperationAction(ISD::SELECT_CC , MVT::i64, Expand);
if (Subtarget->is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
@@ -497,6 +515,14 @@ void X86TargetLowering::resetOperationActions() {
}
}
+ // Special handling for half-precision floating point conversions.
+ // If we don't have F16C support, then lower half float conversions
+ // into library calls.
+ if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
+ setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+ setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
+ }
+
if (Subtarget->hasPOPCNT()) {
setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
} else {
@@ -575,34 +601,18 @@ void X86TargetLowering::resetOperationActions() {
// Expand certain atomics
for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
MVT VT = IntVTs[i];
- setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
}
- if (!Subtarget->is64Bit()) {
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
- }
-
if (Subtarget->hasCmpxchg16b()) {
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
}
// FIXME - use subtarget debug flags
- if (!Subtarget->isTargetDarwin() &&
- !Subtarget->isTargetELF() &&
- !Subtarget->isTargetCygMing()) {
+ if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() &&
+ !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) {
setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
}
@@ -861,6 +871,7 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE;
InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
setTruncStoreAction(VT,
@@ -1433,6 +1444,11 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::OR, MVT::v16i32, Legal);
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
+ }
+
// Custom lower several nodes.
for (int i = MVT::FIRST_VECTOR_VALUETYPE;
i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
@@ -1563,6 +1579,7 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+ setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -1585,6 +1602,16 @@ void X86TargetLowering::resetOperationActions() {
setPrefFunctionAlignment(4); // 2^4 bytes.
}
+TargetLoweringBase::LegalizeTypeAction
+X86TargetLowering::getPreferredVectorAction(EVT VT) const {
+ if (ExperimentalVectorWideningLegalization &&
+ VT.getVectorNumElements() != 1 &&
+ VT.getVectorElementType().getSimpleVT() != MVT::i1)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
@@ -1725,7 +1752,7 @@ const MCExpr *
X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned uid,MCContext &Ctx) const{
- assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ &&
Subtarget->isPICStyleGOT());
// In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
// entries.
@@ -1824,7 +1851,7 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
@@ -1844,7 +1871,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -2016,7 +2043,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
SmallVector<CCValAssign, 16> RVLocs;
bool Is64Bit = Subtarget->is64Bit();
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
// Copy all of the result registers out of their specified physreg.
@@ -2166,8 +2193,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
unsigned i) const {
// Create the nodes corresponding to a load from this parameter slot.
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
- getTargetMachine().Options.GuaranteedTailCallOpt);
+ bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
+ CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
EVT ValVT;
@@ -2224,7 +2251,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2388,7 +2415,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
TotalNumXMMRegs = 0;
if (IsWin64) {
- const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2587,7 +2614,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(),
ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
@@ -2602,7 +2629,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This is a sibcall. The memory operands are available in caller's
// own caller's stack.
NumBytes = 0;
- else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
IsTailCallConvention(CallConv))
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
@@ -2649,7 +2676,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Walk the register/memloc assignments, inserting copies/loads. In the case
// of tail call optimization arguments are handle later.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
// Skip inalloca arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[i].Flags;
@@ -2840,7 +2867,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
}
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
// through a register, since the call instruction's 32-bit
@@ -2864,7 +2891,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// has hidden or protected visibility, or if it is static or local, then
// we don't need to use the PLT - we can directly call it.
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
@@ -2906,7 +2933,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to
// external symbols should go through the PLT.
if (Subtarget->isTargetELF() &&
- getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(!Subtarget->getTargetTriple().isMacOSX() ||
@@ -2945,7 +2972,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass[i].second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -2969,7 +2996,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
- getTargetMachine().Options.GuaranteedTailCallOpt))
+ DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
!Subtarget->getTargetTriple().isOSMSVCRT() &&
@@ -3140,7 +3167,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC);
- if (getTargetMachine().Options.GuaranteedTailCallOpt) {
+ if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) && CCMatch)
return true;
return false;
@@ -3152,7 +3179,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
if (RegInfo->needsStackRealignment(MF))
return false;
@@ -3181,7 +3208,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
@@ -3202,7 +3229,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (Unused) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ DAG.getTarget(), RVLocs, *DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
@@ -3216,12 +3243,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!CCMatch) {
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs1, *DAG.getContext());
+ DAG.getTarget(), RVLocs1, *DAG.getContext());
CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs2, *DAG.getContext());
+ DAG.getTarget(), RVLocs2, *DAG.getContext());
CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
if (RVLocs1.size() != RVLocs2.size())
@@ -3248,7 +3275,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// argument is passed on the stack.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
+ DAG.getTarget(), ArgLocs, *DAG.getContext());
// Allocate shadow area for Win64
if (IsCalleeWin64)
@@ -3265,7 +3292,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
MachineFrameInfo *MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII =
- ((const X86TargetMachine&)getTargetMachine()).getInstrInfo();
+ static_cast<const X86InstrInfo *>(DAG.getTarget().getInstrInfo());
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
@@ -3288,12 +3315,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (!Subtarget->is64Bit() &&
((!isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) ||
- getTargetMachine().getRelocationModel() == Reloc::PIC_)) {
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
unsigned NumInRegs = 0;
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs =
- (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+ (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -3417,7 +3444,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
int ReturnAddrIndex = FuncInfo->getRAIndex();
@@ -3967,14 +3994,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
unsigned CorrectPosV1 = 0;
unsigned CorrectPosV2 = 0;
- for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
+ for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) {
+ if (Mask[i] == -1) {
+ ++CorrectPosV1;
+ ++CorrectPosV2;
+ continue;
+ }
+
if (Mask[i] == i)
++CorrectPosV1;
else if (Mask[i] == i + 4)
++CorrectPosV2;
+ }
if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
- // We have 3 elements from one vector, and one from another.
+ // We have 3 elements (undefs count as elements from any vector) from one
+ // vector, and one from another.
return true;
return false;
@@ -4823,19 +4858,6 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
return true;
}
-/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
-/// all the same.
-static bool isSplatVector(SDNode *N) {
- if (N->getOpcode() != ISD::BUILD_VECTOR)
- return false;
-
- SDValue SplatValue = N->getOperand(0);
- for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
- if (N->getOperand(i) != SplatValue)
- return false;
- return true;
-}
-
/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
/// to an zero vector.
/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
@@ -5744,18 +5766,22 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
case ISD::BUILD_VECTOR: {
- // The BUILD_VECTOR node must be a splat.
- if (!isSplatVector(Op.getNode()))
+ auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
+ BitVector UndefElements;
+ SDValue Splat = BVOp->getSplatValue(&UndefElements);
+
+ // We need a splat of a single value to use broadcast, and it doesn't
+ // make any sense if the value is only in one element of the vector.
+ if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
return SDValue();
- Ld = Op.getOperand(0);
+ Ld = Splat;
ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
- Ld.getOpcode() == ISD::ConstantFP);
+ Ld.getOpcode() == ISD::ConstantFP);
- // The suspected load node has several users. Make sure that all
- // of its users are from the BUILD_VECTOR node.
- // Constants may have multiple users.
- if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+ // Make sure that all of the users of a non-constant load are from the
+ // BUILD_VECTOR node.
+ if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
return SDValue();
break;
}
@@ -6042,6 +6068,433 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BITCAST, dl, VT, Select);
}
+/// \brief Return true if \p N implements a horizontal binop and return the
+/// operands for the horizontal binop into V0 and V1.
+///
+/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This function checks that the build_vector \p N in input implements a
+/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
+/// operation to match.
+/// For example, if \p Opcode is equal to ISD::ADD, then this function
+/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
+/// is equal to ISD::SUB, then this function checks if this is a horizontal
+/// arithmetic sub.
+///
+/// This function only analyzes elements of \p N whose indices are
+/// in range [BaseIdx, LastIdx).
+static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
+ SelectionDAG &DAG,
+ unsigned BaseIdx, unsigned LastIdx,
+ SDValue &V0, SDValue &V1) {
+ EVT VT = N->getValueType(0);
+
+ assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
+ assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
+ "Invalid Vector in input!");
+
+ bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
+ bool CanFold = true;
+ unsigned ExpectedVExtractIdx = BaseIdx;
+ unsigned NumElts = LastIdx - BaseIdx;
+ V0 = DAG.getUNDEF(VT);
+ V1 = DAG.getUNDEF(VT);
+
+ // Check if N implements a horizontal binop.
+ for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
+ SDValue Op = N->getOperand(i + BaseIdx);
+
+ // Skip UNDEFs.
+ if (Op->getOpcode() == ISD::UNDEF) {
+ // Update the expected vector extract index.
+ if (i * 2 == NumElts)
+ ExpectedVExtractIdx = BaseIdx;
+ ExpectedVExtractIdx += 2;
+ continue;
+ }
+
+ CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
+
+ if (!CanFold)
+ break;
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Try to match the following pattern:
+ // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
+ CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0) == Op1.getOperand(0) &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ isa<ConstantSDNode>(Op1.getOperand(1)));
+ if (!CanFold)
+ break;
+
+ unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
+
+ if (i * 2 < NumElts) {
+ if (V0.getOpcode() == ISD::UNDEF)
+ V0 = Op0.getOperand(0);
+ } else {
+ if (V1.getOpcode() == ISD::UNDEF)
+ V1 = Op0.getOperand(0);
+ if (i * 2 == NumElts)
+ ExpectedVExtractIdx = BaseIdx;
+ }
+
+ SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
+ if (I0 == ExpectedVExtractIdx)
+ CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
+ else if (IsCommutable && I1 == ExpectedVExtractIdx) {
+ // Try to match the following dag sequence:
+ // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
+ CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
+ } else
+ CanFold = false;
+
+ ExpectedVExtractIdx += 2;
+ }
+
+ return CanFold;
+}
+
+/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
+/// a concat_vector.
+///
+/// This is a helper function of PerformBUILD_VECTORCombine.
+/// This function expects two 256-bit vectors called V0 and V1.
+/// At first, each vector is split into two separate 128-bit vectors.
+/// Then, the resulting 128-bit vectors are used to implement two
+/// horizontal binary operations.
+///
+/// The kind of horizontal binary operation is defined by \p X86Opcode.
+///
+/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
+/// the two new horizontal binop.
+/// When Mode is set, the first horizontal binop dag node would take as input
+/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
+/// horizontal binop dag node would take as input the lower 128-bit of V1
+/// and the upper 128-bit of V1.
+/// Example:
+/// HADD V0_LO, V0_HI
+/// HADD V1_LO, V1_HI
+///
+/// Otherwise, the first horizontal binop dag node takes as input the lower
+/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
+/// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1.
+/// Example:
+/// HADD V0_LO, V1_LO
+/// HADD V0_HI, V1_HI
+///
+/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
+/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
+/// the upper 128-bits of the result.
+static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
+ SDLoc DL, SelectionDAG &DAG,
+ unsigned X86Opcode, bool Mode,
+ bool isUndefLO, bool isUndefHI) {
+ EVT VT = V0.getValueType();
+ assert(VT.is256BitVector() && VT == V1.getValueType() &&
+ "Invalid nodes in input!");
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL);
+ SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL);
+ SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL);
+ SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL);
+ EVT NewVT = V0_LO.getValueType();
+
+ SDValue LO = DAG.getUNDEF(NewVT);
+ SDValue HI = DAG.getUNDEF(NewVT);
+
+ if (Mode) {
+ // Don't emit a horizontal binop if the result is expected to be UNDEF.
+ if (!isUndefLO && V0->getOpcode() != ISD::UNDEF)
+ LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
+ if (!isUndefHI && V1->getOpcode() != ISD::UNDEF)
+ HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
+ } else {
+ // Don't emit a horizontal binop if the result is expected to be UNDEF.
+ if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF ||
+ V1_LO->getOpcode() != ISD::UNDEF))
+ LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
+
+ if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF ||
+ V1_HI->getOpcode() != ISD::UNDEF))
+ HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
+}
+
+/// \brief Try to fold a build_vector that performs an 'addsub' into the
+/// sequence of 'vadd + vsub + blendi'.
+static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(BV);
+ EVT VT = BV->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InVec0 = DAG.getUNDEF(VT);
+ SDValue InVec1 = DAG.getUNDEF(VT);
+
+ assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+ VT == MVT::v2f64) && "build_vector with an invalid type found!");
+
+ // Don't try to emit a VSELECT that cannot be lowered into a blend.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ // Odd-numbered elements in the input build vector are obtained from
+ // adding two integer/float elements.
+ // Even-numbered elements in the input build vector are obtained from
+ // subtracting two integer/float elements.
+ unsigned ExpectedOpcode = ISD::FSUB;
+ unsigned NextExpectedOpcode = ISD::FADD;
+ bool AddFound = false;
+ bool SubFound = false;
+
+ for (unsigned i = 0, e = NumElts; i != e; i++) {
+ SDValue Op = BV->getOperand(i);
+
+ // Skip 'undef' values.
+ unsigned Opcode = Op.getOpcode();
+ if (Opcode == ISD::UNDEF) {
+ std::swap(ExpectedOpcode, NextExpectedOpcode);
+ continue;
+ }
+
+ // Early exit if we found an unexpected opcode.
+ if (Opcode != ExpectedOpcode)
+ return SDValue();
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ // Try to match the following pattern:
+ // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
+ // Early exit if we cannot match that sequence.
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(Op0.getOperand(1)) ||
+ !isa<ConstantSDNode>(Op1.getOperand(1)) ||
+ Op0.getOperand(1) != Op1.getOperand(1))
+ return SDValue();
+
+ unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
+ if (I0 != i)
+ return SDValue();
+
+ // We found a valid add/sub node. Update the information accordingly.
+ if (i & 1)
+ AddFound = true;
+ else
+ SubFound = true;
+
+ // Update InVec0 and InVec1.
+ if (InVec0.getOpcode() == ISD::UNDEF)
+ InVec0 = Op0.getOperand(0);
+ if (InVec1.getOpcode() == ISD::UNDEF)
+ InVec1 = Op1.getOperand(0);
+
+ // Make sure that operands in input to each add/sub node always
+ // come from a same pair of vectors.
+ if (InVec0 != Op0.getOperand(0)) {
+ if (ExpectedOpcode == ISD::FSUB)
+ return SDValue();
+
+ // FADD is commutable. Try to commute the operands
+ // and then test again.
+ std::swap(Op0, Op1);
+ if (InVec0 != Op0.getOperand(0))
+ return SDValue();
+ }
+
+ if (InVec1 != Op1.getOperand(0))
+ return SDValue();
+
+ // Update the pair of expected opcodes.
+ std::swap(ExpectedOpcode, NextExpectedOpcode);
+ }
+
+ // Don't try to fold this build_vector into a VSELECT if it has
+ // too many UNDEF operands.
+ if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF &&
+ InVec1.getOpcode() != ISD::UNDEF) {
+ // Emit a sequence of vector add and sub followed by a VSELECT.
+ // The new VSELECT will be lowered into a BLENDI.
+ // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI'
+ // and emit a single ADDSUB instruction.
+ SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1);
+ SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1);
+
+ // Construct the VSELECT mask.
+ EVT MaskVT = VT.changeVectorElementTypeToInteger();
+ EVT SVT = MaskVT.getVectorElementType();
+ unsigned SVTBits = SVT.getSizeInBits();
+ SmallVector<SDValue, 8> Ops;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ APInt Value = i & 1 ? APInt::getNullValue(SVTBits) :
+ APInt::getAllOnesValue(SVTBits);
+ SDValue Constant = DAG.getConstant(Value, SVT);
+ Ops.push_back(Constant);
+ }
+
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops);
+ return DAG.getSelect(DL, VT, Mask, Sub, Add);
+ }
+
+ return SDValue();
+}
+
+static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+ SDValue InVec0, InVec1;
+
+ // Try to match an ADDSUB.
+ if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) {
+ SDValue Value = matchAddSub(BV, DAG, Subtarget);
+ if (Value.getNode())
+ return Value;
+ }
+
+ // Try to match horizontal ADD/SUB.
+ unsigned NumUndefsLO = 0;
+ unsigned NumUndefsHI = 0;
+ unsigned Half = NumElts/2;
+
+ // Count the number of UNDEF operands in the build_vector in input.
+ for (unsigned i = 0, e = Half; i != e; ++i)
+ if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+ NumUndefsLO++;
+
+ for (unsigned i = Half, e = NumElts; i != e; ++i)
+ if (BV->getOperand(i)->getOpcode() == ISD::UNDEF)
+ NumUndefsHI++;
+
+ // Early exit if this is either a build_vector of all UNDEFs or all the
+ // operands but one are UNDEF.
+ if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
+ return SDValue();
+
+ if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) {
+ // Try to match an SSE3 float HADD/HSUB.
+ if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
+ } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
+ // Try to match an SSSE3 integer HADD/HSUB.
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
+ }
+
+ if (!Subtarget->hasAVX())
+ return SDValue();
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
+ // Try to match an AVX horizontal add/sub of packed single/double
+ // precision floating point values from 256-bit vectors.
+ SDValue InVec2, InVec3;
+ if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
+
+ if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
+ } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
+ // Try to match an AVX2 horizontal add/sub of signed integers.
+ SDValue InVec2, InVec3;
+ unsigned X86Opcode;
+ bool CanFold = true;
+
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ X86Opcode = X86ISD::HADD;
+ else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
+ isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
+ ((InVec0.getOpcode() == ISD::UNDEF ||
+ InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) &&
+ ((InVec1.getOpcode() == ISD::UNDEF ||
+ InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3))
+ X86Opcode = X86ISD::HSUB;
+ else
+ CanFold = false;
+
+ if (CanFold) {
+ // Fold this build_vector into a single horizontal add/sub.
+ // Do this only if the target has AVX2.
+ if (Subtarget->hasAVX2())
+ return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
+
+ // Do not try to expand this build_vector into a pair of horizontal
+ // add/sub if we can emit a pair of scalar add/sub.
+ if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+ return SDValue();
+
+ // Convert this build_vector into a pair of horizontal binop followed by
+ // a concat vector.
+ bool isUndefLO = NumUndefsLO == Half;
+ bool isUndefHI = NumUndefsHI == Half;
+ return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
+ isUndefLO, isUndefHI);
+ }
+ }
+
+ if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
+ VT == MVT::v16i16) && Subtarget->hasAVX()) {
+ unsigned X86Opcode;
+ if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::HADD;
+ else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::HSUB;
+ else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::FHADD;
+ else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
+ X86Opcode = X86ISD::FHSUB;
+ else
+ return SDValue();
+
+ // Don't try to expand this build_vector into a pair of horizontal add/sub
+ // if we can simply emit a pair of scalar add/sub.
+ if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
+ return SDValue();
+
+ // Convert this build_vector into two horizontal add/sub followed by
+ // a concat vector.
+ bool isUndefLO = NumUndefsLO == Half;
+ bool isUndefHI = NumUndefsHI == Half;
+ return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
+ isUndefLO, isUndefHI);
+ }
+
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -6429,38 +6882,1160 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return LowerAVXCONCAT_VECTORS(Op, DAG);
}
-// Try to lower a shuffle node into a simple blend instruction.
-static SDValue
-LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
- const X86Subtarget *Subtarget, SelectionDAG &DAG) {
- SDValue V1 = SVOp->getOperand(0);
- SDValue V2 = SVOp->getOperand(1);
- SDLoc dl(SVOp);
- MVT VT = SVOp->getSimpleValueType(0);
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle lowering
+//
+// This is an experimental code path for lowering vector shuffles on x86. It is
+// designed to handle arbitrary vector shuffles and blends, gracefully
+// degrading performance as necessary. It works hard to recognize idiomatic
+// shuffles and lower them to optimal instruction patterns without leaving
+// a framework that allows reasonably efficient handling of all vector shuffle
+// patterns.
+//===----------------------------------------------------------------------===//
+
+/// \brief Tiny helper function to identify a no-op mask.
+///
+/// This is a somewhat boring predicate function. It checks whether the mask
+/// array input, which is assumed to be a single-input shuffle mask of the kind
+/// used by the X86 shuffle instructions (not a fully general
+/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an
+/// in-place shuffle are 'no-op's.
+static bool isNoopShuffleMask(ArrayRef<int> Mask) {
+ for (int i = 0, Size = Mask.size(); i < Size; ++i)
+ if (Mask[i] != -1 && Mask[i] != i)
+ return false;
+ return true;
+}
+
+/// \brief Helper function to classify a mask as a single-input mask.
+///
+/// This isn't a generic single-input test because in the vector shuffle
+/// lowering we canonicalize single inputs to be the first input operand. This
+/// means we can more quickly test for a single input by only checking whether
+/// an input from the second operand exists. We also assume that the size of
+/// mask corresponds to the size of the input vectors which isn't true in the
+/// fully general case.
+static bool isSingleInputShuffleMask(ArrayRef<int> Mask) {
+ for (int M : Mask)
+ if (M >= (int)Mask.size())
+ return false;
+ return true;
+}
+
+/// \brief Get a 4-lane 8-bit shuffle immediate for a mask.
+///
+/// This helper function produces an 8-bit shuffle immediate corresponding to
+/// the ubiquitous shuffle encoding scheme used in x86 instructions for
+/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for
+/// example.
+///
+/// NB: We rely heavily on "undef" masks preserving the input lane.
+static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask,
+ SelectionDAG &DAG) {
+ assert(Mask.size() == 4 && "Only 4-lane shuffle masks");
+ assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!");
+ assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!");
+ assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!");
+ assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!");
+
+ unsigned Imm = 0;
+ Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0;
+ Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2;
+ Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4;
+ Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6;
+ return DAG.getConstant(Imm, MVT::i8);
+}
+
+/// \brief Handle lowering of 2-lane 64-bit floating point shuffles.
+///
+/// This is the basis function for the 2-lane 64-bit shuffles as we have full
+/// support for floating point shuffles but not integer shuffles. These
+/// instructions will incur a domain crossing penalty on some chips though so
+/// it is better to avoid lowering through this for integer vectors where
+/// possible.
+static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Straight shuffle of a single input vector. Simulate this by using the
+ // single input as both of the "inputs" to this instruction..
+ unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1);
+ return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+ }
+ assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
+ assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+
+ unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
+ return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
+ DAG.getConstant(SHUFPDMask, MVT::i8));
+}
+
+/// \brief Handle lowering of 2-lane 64-bit integer shuffles.
+///
+/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by
+/// the integer unit to minimize domain crossing penalties. However, for blends
+/// it falls back to the floating point shuffle operation with appropriate bit
+/// casting.
+static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask)) {
+ // Straight shuffle of a single input vector. For everything from SSE2
+ // onward this has a single fast instruction with no scary immediates.
+ // We have to map the mask as it is actually a v4i32 shuffle instruction.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1);
+ int WidenedMask[4] = {
+ std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1,
+ std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1};
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1,
+ getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
+ }
+
+ // We implement this with SHUFPD which is pretty lame because it will likely
+ // incur 2 cycles of stall for integer vectors on Nehalem and older chips.
+ // However, all the alternatives are still more cycles and newer chips don't
+ // have this problem. It would be really nice if x86 had better shuffles here.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1);
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2);
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+ DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask));
+}
+
+/// \brief Lower 4-lane 32-bit floating point shuffles.
+///
+/// Uses instructions exclusively from the floating point unit to minimize
+/// domain crossing penalties, as these are sufficient to implement all v4f32
+/// shuffles.
+static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ SDValue LowV = V1, HighV = V2;
+ int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]};
+
+ int NumV2Elements =
+ std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
+
+ if (NumV2Elements == 0)
+ // Straight shuffle of a single input vector. We pass the input vector to
+ // both operands to simulate this with a SHUFPS.
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ if (NumV2Elements == 1) {
+ int V2Index =
+ std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
+ Mask.begin();
+ // Compute the index adjacent to V2Index and in the same half by toggling
+ // the low bit.
+ int V2AdjIndex = V2Index ^ 1;
+
+ if (Mask[V2AdjIndex] == -1) {
+ // Handles all the cases where we have a single V2 element and an undef.
+ // This will only ever happen in the high lanes because we commute the
+ // vector otherwise.
+ if (V2Index < 2)
+ std::swap(LowV, HighV);
+ NewMask[V2Index] -= 4;
+ } else {
+ // Handle the case where the V2 element ends up adjacent to a V1 element.
+ // To make this work, blend them together as the first step.
+ int V1Index = V2AdjIndex;
+ int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0};
+ V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1,
+ getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+
+ // Now proceed to reconstruct the final blend as we have the necessary
+ // high or low half formed.
+ if (V2Index < 2) {
+ LowV = V2;
+ HighV = V1;
+ } else {
+ HighV = V2;
+ }
+ NewMask[V1Index] = 2; // We put the V1 element in V2[2].
+ NewMask[V2Index] = 0; // We shifted the V2 element into V2[0].
+ }
+ } else if (NumV2Elements == 2) {
+ if (Mask[0] < 4 && Mask[1] < 4) {
+ // Handle the easy case where we have V1 in the low lanes and V2 in the
+ // high lanes. We never see this reversed because we sort the shuffle.
+ NewMask[2] -= 4;
+ NewMask[3] -= 4;
+ } else {
+ // We have a mixture of V1 and V2 in both low and high lanes. Rather than
+ // trying to place elements directly, just blend them and set up the final
+ // shuffle to place them.
+
+ // The first two blend mask elements are for V1, the second two are for
+ // V2.
+ int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1],
+ Mask[2] < 4 ? Mask[2] : Mask[3],
+ (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4,
+ (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4};
+ V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2,
+ getV4X86ShuffleImm8ForMask(BlendMask, DAG));
+
+ // Now we do a normal shuffle of V1 by giving V1 as both operands to
+ // a blend.
+ LowV = HighV = V1;
+ NewMask[0] = Mask[0] < 4 ? 0 : 2;
+ NewMask[1] = Mask[0] < 4 ? 2 : 0;
+ NewMask[2] = Mask[2] < 4 ? 1 : 3;
+ NewMask[3] = Mask[2] < 4 ? 3 : 1;
+ }
+ }
+ return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV,
+ getV4X86ShuffleImm8ForMask(NewMask, DAG));
+}
+
+/// \brief Lower 4-lane i32 vector shuffles.
+///
+/// We try to handle these with integer-domain shuffles where we can, but for
+/// blends we use the floating point domain blend instructions.
+static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+
+ if (isSingleInputShuffleMask(Mask))
+ // Straight shuffle of a single input vector. For everything from SSE2
+ // onward this has a single fast instruction with no scary immediates.
+ return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // We implement this with SHUFPS because it can blend from two vectors.
+ // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
+ // up the inputs, bypassing domain shift penalties that we would encur if we
+ // directly used PSHUFD on Nehalem and older. For newer chips, this isn't
+ // relevant.
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32,
+ DAG.getVectorShuffle(
+ MVT::v4f32, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1),
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask));
+}
+
+/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
+/// shuffle lowering, and the most complex part.
+///
+/// The lowering strategy is to try to form pairs of input lanes which are
+/// targeted at the same half of the final vector, and then use a dword shuffle
+/// to place them onto the right half, and finally unpack the paired lanes into
+/// their final position.
+///
+/// The exact breakdown of how to form these dword pairs and align them on the
+/// correct sides is really tricky. See the comments within the function for
+/// more of the details.
+static SDValue lowerV8I16SingleInputVectorShuffle(
+ SDLoc DL, SDValue V, MutableArrayRef<int> Mask,
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
+ assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ MutableArrayRef<int> LoMask = Mask.slice(0, 4);
+ MutableArrayRef<int> HiMask = Mask.slice(4, 4);
+
+ SmallVector<int, 4> LoInputs;
+ std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs),
+ [](int M) { return M >= 0; });
+ std::sort(LoInputs.begin(), LoInputs.end());
+ LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end());
+ SmallVector<int, 4> HiInputs;
+ std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs),
+ [](int M) { return M >= 0; });
+ std::sort(HiInputs.begin(), HiInputs.end());
+ HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end());
+ int NumLToL =
+ std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin();
+ int NumHToL = LoInputs.size() - NumLToL;
+ int NumLToH =
+ std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin();
+ int NumHToH = HiInputs.size() - NumLToH;
+ MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL);
+ MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH);
+ MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL);
+ MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH);
+
+ // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all
+ // such inputs we can swap two of the dwords across the half mark and end up
+ // with <=2 inputs to each half in each half. Once there, we can fall through
+ // to the generic code below. For example:
+ //
+ // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h]
+ // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5]
+ //
+ // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2
+ // and 2-2.
+ auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput,
+ int ThreeInputHalfSum, int OneInputHalfOffset) {
+ // Compute the index of dword with only one word among the three inputs in
+ // a half by taking the sum of the half with three inputs and subtracting
+ // the sum of the actual three inputs. The difference is the remaining
+ // slot.
+ int DWordA = (ThreeInputHalfSum -
+ std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) /
+ 2;
+ int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2;
+
+ int PSHUFDMask[] = {0, 1, 2, 3};
+ PSHUFDMask[DWordA] = DWordB;
+ PSHUFDMask[DWordB] = DWordA;
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+
+ // Adjust the mask to match the new locations of A and B.
+ for (int &M : Mask)
+ if (M != -1 && M/2 == DWordA)
+ M = 2 * DWordB + M % 2;
+ else if (M != -1 && M/2 == DWordB)
+ M = 2 * DWordA + M % 2;
+
+ // Recurse back into this routine to re-compute state now that this isn't
+ // a 3 and 1 problem.
+ return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
+ Mask);
+ };
+ if (NumLToL == 3 && NumHToL == 1)
+ return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4);
+ else if (NumLToL == 1 && NumHToL == 3)
+ return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0);
+ else if (NumLToH == 1 && NumHToH == 3)
+ return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0);
+ else if (NumLToH == 3 && NumHToH == 1)
+ return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4);
+
+ // At this point there are at most two inputs to the low and high halves from
+ // each half. That means the inputs can always be grouped into dwords and
+ // those dwords can then be moved to the correct half with a dword shuffle.
+ // We use at most one low and one high word shuffle to collect these paired
+ // inputs into dwords, and finally a dword shuffle to place them.
+ int PSHUFLMask[4] = {-1, -1, -1, -1};
+ int PSHUFHMask[4] = {-1, -1, -1, -1};
+ int PSHUFDMask[4] = {-1, -1, -1, -1};
+
+ // First fix the masks for all the inputs that are staying in their
+ // original halves. This will then dictate the targets of the cross-half
+ // shuffles.
+ auto fixInPlaceInputs = [&PSHUFDMask](
+ ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask,
+ MutableArrayRef<int> HalfMask, int HalfOffset) {
+ if (InPlaceInputs.empty())
+ return;
+ if (InPlaceInputs.size() == 1) {
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2;
+ return;
+ }
+
+ assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!");
+ SourceHalfMask[InPlaceInputs[0] - HalfOffset] =
+ InPlaceInputs[0] - HalfOffset;
+ // Put the second input next to the first so that they are packed into
+ // a dword. We find the adjacent index by toggling the low bit.
+ int AdjIndex = InPlaceInputs[0] ^ 1;
+ SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset;
+ std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex);
+ PSHUFDMask[AdjIndex / 2] = AdjIndex / 2;
+ };
+ if (!HToLInputs.empty())
+ fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0);
+ if (!LToHInputs.empty())
+ fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4);
+
+ // Now gather the cross-half inputs and place them into a free dword of
+ // their target half.
+ // FIXME: This operation could almost certainly be simplified dramatically to
+ // look more like the 3-1 fixing operation.
+ auto moveInputsToRightHalf = [&PSHUFDMask](
+ MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs,
+ MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask,
+ int SourceOffset, int DestOffset) {
+ auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) {
+ return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word;
+ };
+ auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask,
+ int Word) {
+ int LowWord = Word & ~1;
+ int HighWord = Word | 1;
+ return isWordClobbered(SourceHalfMask, LowWord) ||
+ isWordClobbered(SourceHalfMask, HighWord);
+ };
+
+ if (IncomingInputs.empty())
+ return;
+
+ if (ExistingInputs.empty()) {
+ // Map any dwords with inputs from them into the right half.
+ for (int Input : IncomingInputs) {
+ // If the source half mask maps over the inputs, turn those into
+ // swaps and use the swapped lane.
+ if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) {
+ if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) {
+ SourceHalfMask[SourceHalfMask[Input - SourceOffset]] =
+ Input - SourceOffset;
+ // We have to swap the uses in our half mask in one sweep.
+ for (int &M : HalfMask)
+ if (M == SourceHalfMask[Input - SourceOffset])
+ M = Input;
+ else if (M == Input)
+ M = SourceHalfMask[Input - SourceOffset] + SourceOffset;
+ } else {
+ assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] ==
+ Input - SourceOffset &&
+ "Previous placement doesn't match!");
+ }
+ // Note that this correctly re-maps both when we do a swap and when
+ // we observe the other side of the swap above. We rely on that to
+ // avoid swapping the members of the input list directly.
+ Input = SourceHalfMask[Input - SourceOffset] + SourceOffset;
+ }
+
+ // Map the input's dword into the correct half.
+ if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1)
+ PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2;
+ else
+ assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] ==
+ Input / 2 &&
+ "Previous placement doesn't match!");
+ }
+
+ // And just directly shift any other-half mask elements to be same-half
+ // as we will have mirrored the dword containing the element into the
+ // same position within that half.
+ for (int &M : HalfMask)
+ if (M >= SourceOffset && M < SourceOffset + 4) {
+ M = M - SourceOffset + DestOffset;
+ assert(M >= 0 && "This should never wrap below zero!");
+ }
+ return;
+ }
+
+ // Ensure we have the input in a viable dword of its current half. This
+ // is particularly tricky because the original position may be clobbered
+ // by inputs being moved and *staying* in that half.
+ if (IncomingInputs.size() == 1) {
+ if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
+ int InputFixed = std::find(std::begin(SourceHalfMask),
+ std::end(SourceHalfMask), -1) -
+ std::begin(SourceHalfMask) + SourceOffset;
+ SourceHalfMask[InputFixed - SourceOffset] =
+ IncomingInputs[0] - SourceOffset;
+ std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
+ InputFixed);
+ IncomingInputs[0] = InputFixed;
+ }
+ } else if (IncomingInputs.size() == 2) {
+ if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 ||
+ isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
+ int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2;
+ assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) &&
+ "Not all dwords can be clobbered!");
+ SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset;
+ SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset;
+ for (int &M : HalfMask)
+ if (M == IncomingInputs[0])
+ M = SourceDWordBase + SourceOffset;
+ else if (M == IncomingInputs[1])
+ M = SourceDWordBase + 1 + SourceOffset;
+ IncomingInputs[0] = SourceDWordBase + SourceOffset;
+ IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset;
+ }
+ } else {
+ llvm_unreachable("Unhandled input size!");
+ }
+
+ // Now hoist the DWord down to the right half.
+ int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2;
+ assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free");
+ PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2;
+ for (int Input : IncomingInputs)
+ std::replace(HalfMask.begin(), HalfMask.end(), Input,
+ FreeDWord * 2 + Input % 2);
+ };
+ moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask,
+ /*SourceOffset*/ 4, /*DestOffset*/ 0);
+ moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask,
+ /*SourceOffset*/ 0, /*DestOffset*/ 4);
+
+ // Now enact all the shuffles we've computed to move the inputs into their
+ // target half.
+ if (!isNoopShuffleMask(PSHUFLMask))
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG));
+ if (!isNoopShuffleMask(PSHUFHMask))
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG));
+ if (!isNoopShuffleMask(PSHUFDMask))
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V),
+ getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG)));
+
+ // At this point, each half should contain all its inputs, and we can then
+ // just shuffle them into their final position.
+ assert(std::count_if(LoMask.begin(), LoMask.end(),
+ [](int M) { return M >= 4; }) == 0 &&
+ "Failed to lift all the high half inputs to the low mask!");
+ assert(std::count_if(HiMask.begin(), HiMask.end(),
+ [](int M) { return M >= 0 && M < 4; }) == 0 &&
+ "Failed to lift all the low half inputs to the high mask!");
+
+ // Do a half shuffle for the low mask.
+ if (!isNoopShuffleMask(LoMask))
+ V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(LoMask, DAG));
+
+ // Do a half shuffle with the high mask after shifting its values down.
+ for (int &M : HiMask)
+ if (M >= 0)
+ M -= 4;
+ if (!isNoopShuffleMask(HiMask))
+ V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V,
+ getV4X86ShuffleImm8ForMask(HiMask, DAG));
+
+ return V;
+}
+
+/// \brief Detect whether the mask pattern should be lowered through
+/// interleaving.
+///
+/// This essentially tests whether viewing the mask as an interleaving of two
+/// sub-sequences reduces the cross-input traffic of a blend operation. If so,
+/// lowering it through interleaving is a significantly better strategy.
+static bool shouldLowerAsInterleaving(ArrayRef<int> Mask) {
+ int NumEvenInputs[2] = {0, 0};
+ int NumOddInputs[2] = {0, 0};
+ int NumLoInputs[2] = {0, 0};
+ int NumHiInputs[2] = {0, 0};
+ for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+ if (Mask[i] < 0)
+ continue;
+
+ int InputIdx = Mask[i] >= Size;
+
+ if (i < Size / 2)
+ ++NumLoInputs[InputIdx];
+ else
+ ++NumHiInputs[InputIdx];
+
+ if ((i % 2) == 0)
+ ++NumEvenInputs[InputIdx];
+ else
+ ++NumOddInputs[InputIdx];
+ }
+
+ // The minimum number of cross-input results for both the interleaved and
+ // split cases. If interleaving results in fewer cross-input results, return
+ // true.
+ int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0],
+ NumEvenInputs[0] + NumOddInputs[1]);
+ int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0],
+ NumLoInputs[0] + NumHiInputs[1]);
+ return InterleavedCrosses < SplitCrosses;
+}
+
+/// \brief Blend two v8i16 vectors using a naive unpack strategy.
+///
+/// This strategy only works when the inputs from each vector fit into a single
+/// half of that vector, and generally there are not so many inputs as to leave
+/// the in-place shuffles required highly constrained (and thus expensive). It
+/// shifts all the inputs into a single side of both input vectors and then
+/// uses an unpack to interleave these inputs in a single vector. At that
+/// point, we will fall back on the generic single input shuffle lowering.
+static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1,
+ SDValue V2,
+ MutableArrayRef<int> Mask,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!");
+ SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs;
+ for (int i = 0; i < 8; ++i)
+ if (Mask[i] >= 0 && Mask[i] < 4)
+ LoV1Inputs.push_back(i);
+ else if (Mask[i] >= 4 && Mask[i] < 8)
+ HiV1Inputs.push_back(i);
+ else if (Mask[i] >= 8 && Mask[i] < 12)
+ LoV2Inputs.push_back(i);
+ else if (Mask[i] >= 12)
+ HiV2Inputs.push_back(i);
+
+ int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size();
+ int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size();
+ (void)NumV1Inputs;
+ (void)NumV2Inputs;
+ assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported");
+ assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported");
+ assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs");
+
+ bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >=
+ HiV1Inputs.size() + HiV2Inputs.size();
+
+ auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs,
+ ArrayRef<int> HiInputs, bool MoveToLo,
+ int MaskOffset) {
+ ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs;
+ ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs;
+ if (BadInputs.empty())
+ return V;
+
+ int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int MoveOffset = MoveToLo ? 0 : 4;
+
+ if (GoodInputs.empty()) {
+ for (int BadInput : BadInputs) {
+ MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset;
+ Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset;
+ }
+ } else {
+ if (GoodInputs.size() == 2) {
+ // If the low inputs are spread across two dwords, pack them into
+ // a single dword.
+ MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] =
+ Mask[GoodInputs[0]] - MaskOffset;
+ MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] =
+ Mask[GoodInputs[1]] - MaskOffset;
+ Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset;
+ } else {
+ // Otherwise pin the low inputs.
+ for (int GoodInput : GoodInputs)
+ MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset;
+ }
+
+ int MoveMaskIdx =
+ std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) -
+ std::begin(MoveMask);
+ assert(MoveMaskIdx >= MoveOffset && "Established above");
+
+ if (BadInputs.size() == 2) {
+ assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot");
+ assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot");
+ MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] =
+ Mask[BadInputs[0]] - MaskOffset;
+ MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] =
+ Mask[BadInputs[1]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset;
+ Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset;
+ } else {
+ assert(BadInputs.size() == 1 && "All sizes handled");
+ MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset;
+ Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset;
+ }
+ }
+
+ return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16),
+ MoveMask);
+ };
+ V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo,
+ /*MaskOffset*/ 0);
+ V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo,
+ /*MaskOffset*/ 8);
+
+ // FIXME: Select an interleaving of the merge of V1 and V2 that minimizes
+ // cross-half traffic in the final shuffle.
+
+ // Munge the mask to be a single-input mask after the unpack merges the
+ // results.
+ for (int &M : Mask)
+ if (M != -1)
+ M = 2 * (M % 4) + (M / 8);
+
+ return DAG.getVectorShuffle(
+ MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH,
+ DL, MVT::v8i16, V1, V2),
+ DAG.getUNDEF(MVT::v8i16), Mask);
+}
+
+/// \brief Generic lowering of 8-lane i16 shuffles.
+///
+/// This handles both single-input shuffles and combined shuffle/blends with
+/// two inputs. The single input shuffles are immediately delegated to
+/// a dedicated lowering routine.
+///
+/// The blends are lowered in one of three fundamental ways. If there are few
+/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle
+/// of the input is significantly cheaper when lowered as an interleaving of
+/// the two inputs, try to interleave them. Otherwise, blend the low and high
+/// halves of the inputs separately (making them have relatively few inputs)
+/// and then concatenate them.
+static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> OrigMask = SVOp->getMask();
+ int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
+ OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]};
+ MutableArrayRef<int> Mask(MaskStorage);
+
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+
+ auto isV1 = [](int M) { return M >= 0 && M < 8; };
+ auto isV2 = [](int M) { return M >= 8; };
+
+ int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1);
+ int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2);
+
+ if (NumV2Inputs == 0)
+ return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG);
+
+ assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized "
+ "to be V1-input shuffles.");
+
+ if (NumV1Inputs + NumV2Inputs <= 4)
+ return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG);
+
+ // Check whether an interleaving lowering is likely to be more efficient.
+ // This isn't perfect but it is a strong heuristic that tends to work well on
+ // the kinds of shuffles that show up in practice.
+ //
+ // FIXME: Handle 1x, 2x, and 4x interleaving.
+ if (shouldLowerAsInterleaving(Mask)) {
+ // FIXME: Figure out whether we should pack these into the low or high
+ // halves.
+
+ int EMask[8], OMask[8];
+ for (int i = 0; i < 4; ++i) {
+ EMask[i] = Mask[2*i];
+ OMask[i] = Mask[2*i + 1];
+ EMask[i + 4] = -1;
+ OMask[i + 4] = -1;
+ }
+
+ SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask);
+ SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask);
+
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds);
+ }
+
+ int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+ for (int i = 0; i < 4; ++i) {
+ LoBlendMask[i] = Mask[i];
+ HiBlendMask[i] = Mask[i + 4];
+ }
+
+ SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
+ SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
+ LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV);
+ HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV);
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV));
+}
+
+/// \brief Generic lowering of v16i8 shuffles.
+///
+/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to
+/// detect any complexity reducing interleaving. If that doesn't help, it uses
+/// UNPCK to spread the i8 elements across two i16-element vectors, and uses
+/// the existing lowering for v8i16 blends on each half, finally PACK-ing them
+/// back together.
+static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!");
+ assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!");
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> OrigMask = SVOp->getMask();
+ assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ int MaskStorage[16] = {
+ OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3],
+ OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7],
+ OrigMask[8], OrigMask[9], OrigMask[10], OrigMask[11],
+ OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]};
+ MutableArrayRef<int> Mask(MaskStorage);
+ MutableArrayRef<int> LoMask = Mask.slice(0, 8);
+ MutableArrayRef<int> HiMask = Mask.slice(8, 8);
+
+ // For single-input shuffles, there are some nicer lowering tricks we can use.
+ if (isSingleInputShuffleMask(Mask)) {
+ // Check whether we can widen this to an i16 shuffle by duplicating bytes.
+ // Notably, this handles splat and partial-splat shuffles more efficiently.
+ // However, it only makes sense if the pre-duplication shuffle simplifies
+ // things significantly. Currently, this means we need to be able to
+ // express the pre-duplication shuffle as an i16 shuffle.
+ //
+ // FIXME: We should check for other patterns which can be widened into an
+ // i16 shuffle as well.
+ auto canWidenViaDuplication = [](ArrayRef<int> Mask) {
+ for (int i = 0; i < 16; i += 2) {
+ if (Mask[i] != Mask[i + 1])
+ return false;
+ }
+ return true;
+ };
+ auto tryToWidenViaDuplication = [&]() -> SDValue {
+ if (!canWidenViaDuplication(Mask))
+ return SDValue();
+ SmallVector<int, 4> LoInputs;
+ std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs),
+ [](int M) { return M >= 0 && M < 8; });
+ std::sort(LoInputs.begin(), LoInputs.end());
+ LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()),
+ LoInputs.end());
+ SmallVector<int, 4> HiInputs;
+ std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs),
+ [](int M) { return M >= 8; });
+ std::sort(HiInputs.begin(), HiInputs.end());
+ HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()),
+ HiInputs.end());
+
+ bool TargetLo = LoInputs.size() >= HiInputs.size();
+ ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs;
+ ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs;
+
+ int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ SmallDenseMap<int, int, 8> LaneMap;
+ for (int I : InPlaceInputs) {
+ PreDupI16Shuffle[I/2] = I/2;
+ LaneMap[I] = I;
+ }
+ int j = TargetLo ? 0 : 4, je = j + 4;
+ for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) {
+ // Check if j is already a shuffle of this input. This happens when
+ // there are two adjacent bytes after we move the low one.
+ if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) {
+ // If we haven't yet mapped the input, search for a slot into which
+ // we can map it.
+ while (j < je && PreDupI16Shuffle[j] != -1)
+ ++j;
+
+ if (j == je)
+ // We can't place the inputs into a single half with a simple i16 shuffle, so bail.
+ return SDValue();
+
+ // Map this input with the i16 shuffle.
+ PreDupI16Shuffle[j] = MovingInputs[i] / 2;
+ }
+
+ // Update the lane map based on the mapping we ended up with.
+ LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2;
+ }
+ V1 = DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle));
+
+ // Unpack the bytes to form the i16s that will be shuffled into place.
+ V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL,
+ MVT::v16i8, V1, V1);
+
+ int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ for (int i = 0; i < 16; i += 2) {
+ if (Mask[i] != -1)
+ PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8);
+ assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!");
+ }
+ return DAG.getNode(
+ ISD::BITCAST, DL, MVT::v16i8,
+ DAG.getVectorShuffle(MVT::v8i16, DL,
+ DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1),
+ DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle));
+ };
+ if (SDValue V = tryToWidenViaDuplication())
+ return V;
+ }
+
+ // Check whether an interleaving lowering is likely to be more efficient.
+ // This isn't perfect but it is a strong heuristic that tends to work well on
+ // the kinds of shuffles that show up in practice.
+ //
+ // FIXME: We need to handle other interleaving widths (i16, i32, ...).
+ if (shouldLowerAsInterleaving(Mask)) {
+ // FIXME: Figure out whether we should pack these into the low or high
+ // halves.
+
+ int EMask[16], OMask[16];
+ for (int i = 0; i < 8; ++i) {
+ EMask[i] = Mask[2*i];
+ OMask[i] = Mask[2*i + 1];
+ EMask[i + 8] = -1;
+ OMask[i + 8] = -1;
+ }
+
+ SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask);
+ SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask);
+
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds);
+ }
+
+ int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+ int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+ auto buildBlendMasks = [](MutableArrayRef<int> HalfMask,
+ MutableArrayRef<int> V1HalfBlendMask,
+ MutableArrayRef<int> V2HalfBlendMask) {
+ for (int i = 0; i < 8; ++i)
+ if (HalfMask[i] >= 0 && HalfMask[i] < 16) {
+ V1HalfBlendMask[i] = HalfMask[i];
+ HalfMask[i] = i;
+ } else if (HalfMask[i] >= 16) {
+ V2HalfBlendMask[i] = HalfMask[i] - 16;
+ HalfMask[i] = i + 8;
+ }
+ };
+ buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask);
+ buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask);
+
+ SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
+
+ auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask,
+ MutableArrayRef<int> HiBlendMask) {
+ SDValue V1, V2;
+ // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
+ // them out and avoid using UNPCK{L,H} to extract the elements of V as
+ // i16s.
+ if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(),
+ [](int M) { return M >= 0 && M % 2 == 1; }) &&
+ std::none_of(HiBlendMask.begin(), HiBlendMask.end(),
+ [](int M) { return M >= 0 && M % 2 == 1; })) {
+ // Use a mask to drop the high bytes.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1,
+ DAG.getConstant(0x00FF, MVT::v8i16));
+
+ // This will be a single vector shuffle instead of a blend so nuke V2.
+ V2 = DAG.getUNDEF(MVT::v8i16);
+
+ // Squash the masks to point directly into V1.
+ for (int &M : LoBlendMask)
+ if (M >= 0)
+ M /= 2;
+ for (int &M : HiBlendMask)
+ if (M >= 0)
+ M /= 2;
+ } else {
+ // Otherwise just unpack the low half of V into V1 and the high half into
+ // V2 so that we can blend them as i16s.
+ V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
+ V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16,
+ DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero));
+ }
+
+ SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask);
+ SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask);
+ return std::make_pair(BlendedLo, BlendedHi);
+ };
+ SDValue V1Lo, V1Hi, V2Lo, V2Hi;
+ std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask);
+ std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask);
+
+ SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask);
+ SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask);
+
+ return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV);
+}
+
+/// \brief Dispatching routine to lower various 128-bit x86 vector shuffles.
+///
+/// This routine breaks down the specific type of 128-bit shuffle and
+/// dispatches to the lowering routines accordingly.
+static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+ MVT VT, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ switch (VT.SimpleTy) {
+ case MVT::v2i64:
+ return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v2f64:
+ return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4i32:
+ return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v4f32:
+ return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v8i16:
+ return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG);
+ case MVT::v16i8:
+ return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG);
+
+ default:
+ llvm_unreachable("Unimplemented!");
+ }
+}
+
+/// \brief Tiny helper function to test whether adjacent masks are sequential.
+static bool areAdjacentMasksSequential(ArrayRef<int> Mask) {
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2)
+ if (Mask[i] + 1 != Mask[i+1])
+ return false;
+
+ return true;
+}
+
+/// \brief Top-level lowering for x86 vector shuffles.
+///
+/// This handles decomposition, canonicalization, and lowering of all x86
+/// vector shuffles. Most of the specific lowering strategies are encapsulated
+/// above in helper routines. The canonicalization attempts to widen shuffles
+/// to involve fewer lanes of wider elements, consolidate symmetric patterns
+/// s.t. only one of the two inputs needs to be tested, etc.
+static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> Mask = SVOp->getMask();
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT VT = Op.getSimpleValueType();
+ int NumElements = VT.getVectorNumElements();
+ SDLoc dl(Op);
+
+ assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+
+ bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+ bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+ if (V1IsUndef && V2IsUndef)
+ return DAG.getUNDEF(VT);
+
+ // When we create a shuffle node we put the UNDEF node to second operand,
+ // but in some cases the first operand may be transformed to UNDEF.
+ // In this case we should just commute the node.
+ if (V1IsUndef)
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // Check for non-undef masks pointing at an undef vector and make the masks
+ // undef as well. This makes it easier to match the shuffle based solely on
+ // the mask.
+ if (V2IsUndef)
+ for (int M : Mask)
+ if (M >= NumElements) {
+ SmallVector<int, 8> NewMask(Mask.begin(), Mask.end());
+ for (int &M : NewMask)
+ if (M >= NumElements)
+ M = -1;
+ return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask);
+ }
+
+ // For integer vector shuffles, try to collapse them into a shuffle of fewer
+ // lanes but wider integers. We cap this to not form integers larger than i64
+ // but it might be interesting to form i128 integers to handle flipping the
+ // low and high halves of AVX 256-bit vectors.
+ if (VT.isInteger() && VT.getScalarSizeInBits() < 64 &&
+ areAdjacentMasksSequential(Mask)) {
+ SmallVector<int, 8> NewMask;
+ for (int i = 0, Size = Mask.size(); i < Size; i += 2)
+ NewMask.push_back(Mask[i] / 2);
+ MVT NewVT =
+ MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2),
+ VT.getVectorNumElements() / 2);
+ V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+ V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask));
+ }
+
+ int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
+ for (int M : SVOp->getMask())
+ if (M < 0)
+ ++NumUndefElements;
+ else if (M < NumElements)
+ ++NumV1Elements;
+ else
+ ++NumV2Elements;
+
+ // Commute the shuffle as needed such that more elements come from V1 than
+ // V2. This allows us to match the shuffle pattern strictly on how many
+ // elements come from V1 without handling the symmetric cases.
+ if (NumV2Elements > NumV1Elements)
+ return CommuteVectorShuffle(SVOp, DAG);
+
+ // When the number of V1 and V2 elements are the same, try to minimize the
+ // number of uses of V2 in the low half of the vector.
+ if (NumV1Elements == NumV2Elements) {
+ int LowV1Elements = 0, LowV2Elements = 0;
+ for (int M : SVOp->getMask().slice(0, NumElements / 2))
+ if (M >= NumElements)
+ ++LowV2Elements;
+ else if (M >= 0)
+ ++LowV1Elements;
+ if (LowV2Elements > LowV1Elements)
+ return CommuteVectorShuffle(SVOp, DAG);
+ }
+
+ // For each vector width, delegate to a specialized lowering routine.
+ if (VT.getSizeInBits() == 128)
+ return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
+
+ llvm_unreachable("Unimplemented!");
+}
+
+
+//===----------------------------------------------------------------------===//
+// Legacy vector shuffle lowering
+//
+// This code is the legacy code handling vector shuffles until the above
+// replaces its functionality and performance.
+//===----------------------------------------------------------------------===//
+
+static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41,
+ bool hasInt256, unsigned *MaskOut = nullptr) {
MVT EltVT = VT.getVectorElementType();
- unsigned NumElems = VT.getVectorNumElements();
// There is no blend with immediate in AVX-512.
if (VT.is512BitVector())
- return SDValue();
+ return false;
- if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
- return SDValue();
- if (!Subtarget->hasInt256() && VT == MVT::v16i16)
- return SDValue();
+ if (!hasSSE41 || EltVT == MVT::i8)
+ return false;
+ if (!hasInt256 && VT == MVT::v16i16)
+ return false;
- // Check the mask for BLEND and build the value.
unsigned MaskValue = 0;
+ unsigned NumElems = VT.getVectorNumElements();
// There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
- unsigned NumLanes = (NumElems-1)/8 + 1;
+ unsigned NumLanes = (NumElems - 1) / 8 + 1;
unsigned NumElemsInLane = NumElems / NumLanes;
// Blend for v16i16 should be symetric for the both lanes.
for (unsigned i = 0; i < NumElemsInLane; ++i) {
- int SndLaneEltIdx = (NumLanes == 2) ?
- SVOp->getMaskElt(i + NumElemsInLane) : -1;
- int EltIdx = SVOp->getMaskElt(i);
+ int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1;
+ int EltIdx = MaskVals[i];
if ((EltIdx < 0 || EltIdx == (int)i) &&
(SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
@@ -6469,11 +8044,34 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
if (((unsigned)EltIdx == (i + NumElems)) &&
(SndLaneEltIdx < 0 ||
(unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
- MaskValue |= (1<<i);
+ MaskValue |= (1 << i);
else
- return SDValue();
+ return false;
}
+ if (MaskOut)
+ *MaskOut = MaskValue;
+ return true;
+}
+
+// Try to lower a shuffle node into a simple blend instruction.
+// This function assumes isBlendMask returns true for this
+// SuffleVectorSDNode
+static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
+ unsigned MaskValue,
+ const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = SVOp->getSimpleValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(),
+ Subtarget->hasInt256() && "Trying to lower a "
+ "VECTOR_SHUFFLE to a Blend but "
+ "with the wrong mask"));
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ SDLoc dl(SVOp);
+ unsigned NumElems = VT.getVectorNumElements();
+
// Convert i32 vectors to floating point if it is not AVX2.
// AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
MVT BlendVT = VT;
@@ -7450,8 +9048,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
"unsupported vector type for insertps/pinsrd");
- int FromV1 = std::count_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; });
+ auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; };
+ auto FromV2Predicate = [](const int &i) { return i >= 4; };
+ int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate);
SDValue From;
SDValue To;
@@ -7459,23 +9058,26 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
if (FromV1 == 1) {
From = V1;
To = V2;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i < 4; }) -
+ DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) -
Mask.begin();
} else {
+ assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 &&
+ "More than one element from V1 and from V2, or no elements from one "
+ "of the vectors. This case should not have returned true from "
+ "isINSERTPSMask");
From = V2;
To = V1;
- DestIndex = std::find_if(Mask.begin(), Mask.end(),
- [](const int &i) { return i >= 4; }) -
- Mask.begin();
+ DestIndex =
+ std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin();
}
+ unsigned SrcIndex = Mask[DestIndex] % 4;
if (MayFoldLoad(From)) {
// Trivial case, when From comes from a load and is only used by the
// shuffle. Make it use insertps from the vector that we need from that
// load.
SDValue NewLoad =
- NarrowVectorLoadToElement(cast<LoadSDNode>(From), DestIndex, DAG);
+ NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG);
if (!NewLoad.getNode())
return SDValue();
@@ -7496,7 +9098,6 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
}
// Vector-element-to-vector
- unsigned SrcIndex = Mask[DestIndex] % 4;
SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
}
@@ -7663,6 +9264,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool OptForSize = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize);
+ // Check if we should use the experimental vector shuffle lowering. If so,
+ // delegate completely to that code path.
+ if (ExperimentalVectorShuffleLowering)
+ return lowerVectorShuffle(Op, Subtarget, DAG);
+
assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
if (V1IsUndef && V2IsUndef)
@@ -7796,8 +9402,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool Commuted = false;
// FIXME: This should also accept a bitcast of a splat? Be careful, not
// 1,1,1,1 -> v8i16 though.
- V1IsSplat = isSplatVector(V1.getNode());
- V2IsSplat = isSplatVector(V2.getNode());
+ BitVector UndefElements;
+ if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode()))
+ if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
+ V1IsSplat = true;
+ if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode()))
+ if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none())
+ V2IsSplat = true;
// Canonicalize the splat or undef, if present, to be on the RHS.
if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
@@ -7873,6 +9484,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShufflePSHUFLWImmediate(SVOp),
DAG);
+ unsigned MaskValue;
+ if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(),
+ &MaskValue))
+ return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG);
+
if (isSHUFPMask(M, VT))
return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
getShuffleSHUFImmediate(SVOp), DAG);
@@ -7910,10 +9526,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
- SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG);
- if (BlendOp.getNode())
- return BlendOp;
-
if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
return getINSERTPS(SVOp, dl, DAG);
@@ -8530,7 +10142,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
@@ -8563,7 +10175,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel))
@@ -8596,7 +10208,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
// global base reg.
unsigned char OpFlag = 0;
unsigned WrapperKind = X86ISD::Wrapper;
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
if (Subtarget->isPICStyleRIPRel() &&
(M == CodeModel::Small || M == CodeModel::Kernel)) {
@@ -8617,7 +10229,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
// With PIC, the address is actually $g + Offset.
- if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+ if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ &&
!Subtarget->is64Bit()) {
Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
DAG.getNode(X86ISD::GlobalBaseReg,
@@ -8639,7 +10251,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
// Create the TargetBlockAddressAddress node.
unsigned char OpFlags =
Subtarget->ClassifyBlockAddressReference();
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
SDLoc dl(Op);
@@ -8668,8 +10280,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
// Create the TargetGlobalAddress node, folding in the constant
// offset if it is legal.
unsigned char OpFlags =
- Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
- CodeModel::Model M = getTargetMachine().getCodeModel();
+ Subtarget->ClassifyGlobalReference(GV, DAG.getTarget());
+ CodeModel::Model M = DAG.getTarget().getCodeModel();
SDValue Result;
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
@@ -8868,7 +10480,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
- TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+ TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
switch (model) {
case TLSModel::GeneralDynamic:
@@ -8880,9 +10492,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
- Subtarget->is64Bit(),
- getTargetMachine().getRelocationModel() == Reloc::PIC_);
+ return LowerToTLSExecModel(
+ GA, DAG, getPointerTy(), model, Subtarget->is64Bit(),
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_);
}
llvm_unreachable("Unknown TLS model.");
}
@@ -8895,8 +10507,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
// global base reg.
- bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
- !Subtarget->is64Bit();
+ bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
if (PIC32)
OpFlag = X86II::MO_TLVP_PIC_BASE;
else
@@ -10050,10 +11662,27 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
break;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
- case X86::COND_O: case X86::COND_NO:
- NeedOF = true;
+ case X86::COND_O: case X86::COND_NO: {
+ // Check if we really need to set the
+ // Overflow flag. If NoSignedWrap is present
+ // that is not actually needed.
+ switch (Op->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SHL: {
+ const BinaryWithFlagsSDNode *BinNode =
+ cast<BinaryWithFlagsSDNode>(Op.getNode());
+ if (BinNode->hasNoSignedWrap())
+ break;
+ }
+ default:
+ NeedOF = true;
+ break;
+ }
break;
}
+ }
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
@@ -10115,14 +11744,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
+ if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
+ if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
@@ -10138,7 +11767,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
- if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
+ if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() &&
isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
@@ -11469,8 +13098,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
}
if (addTest) {
- CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, dl, DAG);
+ X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE;
+ CC = DAG.getConstant(X86Cond, MVT::i8);
+ Cond = EmitTest(Cond, X86Cond, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -11513,7 +13143,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ const TargetFrameLowering &TFI = *DAG.getTarget().getFrameLowering();
unsigned StackAlign = TFI.getStackAlignment();
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
if (Align > StackAlign)
@@ -11572,7 +13202,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned SPReg = RegInfo->getStackRegister();
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy);
Chain = SP.getValue(1);
@@ -11681,7 +13311,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
if (ArgMode == 2) {
// Sanity Check: Make sure using fp_offset makes sense.
- assert(!getTargetMachine().Options.UseSoftFloat &&
+ assert(!DAG.getTarget().Options.UseSoftFloat &&
!(DAG.getMachineFunction()
.getFunction()->getAttributes()
.hasAttribute(AttributeSet::FunctionIndex,
@@ -12158,11 +13788,37 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::x86_sse2_packssdw_128:
+ case Intrinsic::x86_sse2_packsswb_128:
+ case Intrinsic::x86_avx2_packssdw:
+ case Intrinsic::x86_avx2_packsswb:
+ return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_packuswb_128:
+ case Intrinsic::x86_sse41_packusdw:
+ case Intrinsic::x86_avx2_packuswb:
+ case Intrinsic::x86_avx2_packusdw:
+ return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_pshuf_b_128:
case Intrinsic::x86_avx2_pshuf_b:
return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse2_pshuf_d:
+ return DAG.getNode(X86ISD::PSHUFD, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pshufl_w:
+ return DAG.getNode(X86ISD::PSHUFLW, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pshufh_w:
+ return DAG.getNode(X86ISD::PSHUFHW, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
case Intrinsic::x86_ssse3_psign_b_128:
case Intrinsic::x86_ssse3_psign_w_128:
case Intrinsic::x86_ssse3_psign_d_128:
@@ -12610,6 +14266,51 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
return SDValue(Res, 0);
}
+// getReadPerformanceCounter - Handles the lowering of builtin intrinsics that
+// read performance monitor counters (x86_rdpmc).
+static void getReadPerformanceCounter(SDNode *N, SDLoc DL,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue LO, HI;
+
+ // The ECX register is used to select the index of the performance counter
+ // to read.
+ SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX,
+ N->getOperand(2));
+ SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain);
+
+ // Reads the content of a 64-bit performance counter and returns it in the
+ // registers EDX:EAX.
+ if (Subtarget->is64Bit()) {
+ LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+ LO.getValue(2));
+ } else {
+ LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+ LO.getValue(2));
+ }
+ Chain = HI.getValue(1);
+
+ if (Subtarget->is64Bit()) {
+ // The EAX register is loaded with the low-order 32 bits. The EDX register
+ // is loaded with the supported high-order bits of the counter.
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+ DAG.getConstant(32, MVT::i8));
+ Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { LO, HI };
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
+ Results.push_back(Pair);
+ Results.push_back(Chain);
+}
+
// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that
// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
// also used to custom lower READCYCLECOUNTER nodes.
@@ -12674,7 +14375,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
}
enum IntrinsicType {
- GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST
};
struct IntrinsicData {
@@ -12768,6 +14469,8 @@ static void InitIntinsicsMap() {
IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0)));
IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc,
+ IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0)));
Initialized = true;
}
@@ -12826,7 +14529,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
unsigned HintVal;
- if (dyn_cast<ConstantSDNode> (Hint) == 0 ||
+ if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
(HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
@@ -12843,6 +14546,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
return DAG.getMergeValues(Results, dl);
}
+ // Read Performance Monitoring Counters.
+ case RDPMC: {
+ SmallVector<SDValue, 2> Results;
+ getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
+ return DAG.getMergeValues(Results, dl);
+ }
// XTEST intrinsics.
case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
@@ -12873,7 +14582,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
if (Depth > 0) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT);
return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, dl, PtrVT,
@@ -12895,7 +14604,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
(FrameReg == X86::EBP && VT == MVT::i32)) &&
@@ -12924,7 +14633,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName,
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize());
}
@@ -12936,7 +14645,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo());
unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
(FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
@@ -12983,7 +14692,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDLoc dl (Op);
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo* TRI = DAG.getTarget().getRegisterInfo();
if (Subtarget->is64Bit()) {
SDValue OutChains[6];
@@ -13431,7 +15140,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
CLI.setDebugLoc(dl).setChain(InChain)
.setCallee(getLibcallCallingConv(LC),
static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
- Callee, &Args, 0)
+ Callee, std::move(Args), 0)
.setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -13448,7 +15157,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
(VT == MVT::v8i32 && Subtarget->hasInt256()));
// Get the high parts.
- const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1};
SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
@@ -13464,10 +15173,18 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
// Shuffle it back into the right order.
- const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15};
- SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
- const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14};
- SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ SDValue Highs, Lows;
+ if (VT == MVT::v8i32) {
+ const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ } else {
+ const int HighMask[] = {1, 5, 3, 7};
+ Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 4, 2, 6};
+ Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+ }
// If we have a signed multiply but no PMULDQ fix up the high parts of a
// unsigned multiply.
@@ -13494,10 +15211,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDValue Amt = Op.getOperand(1);
// Optimize shl/srl/sra with constant shift amount.
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
- uint64_t ShiftAmt = C->getZExtValue();
+ if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
+ if (auto *ShiftConst = BVAmt->getConstantSplatNode()) {
+ uint64_t ShiftAmt = ShiftConst->getZExtValue();
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
(Subtarget->hasInt256() &&
@@ -13804,15 +15520,14 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
SelectionDAG &DAG) {
-
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
SDValue R = Op.getOperand(0);
SDValue Amt = Op.getOperand(1);
SDValue V;
- if (!Subtarget->hasSSE2())
- return SDValue();
+ assert(VT.isVector() && "Custom lowering only for vector shifts!");
+ assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!");
V = LowerScalarImmediateShift(Op, DAG, Subtarget);
if (V.getNode())
@@ -14254,7 +15969,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
break;
}
SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
- Op.getOperand(2), SDValue());
+ Op.getOperand(2), SDValue());
SDValue Ops[] = { cpIn.getValue(0),
Op.getOperand(1),
Op.getOperand(3),
@@ -14264,9 +15979,18 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
Ops, T, MMO);
+
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
- return cpOut;
+ SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
+ MVT::i32, cpOut.getValue(2));
+ SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
+ DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1));
+ return SDValue();
}
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
@@ -14422,7 +16146,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
- .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
+ .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
@@ -14446,7 +16170,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
default: llvm_unreachable("Should not custom lower this!");
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG);
case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG);
- case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG);
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
+ return LowerCMP_SWAP(Op, Subtarget, DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -14528,8 +16253,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
}
static void ReplaceATOMIC_LOAD(SDNode *Node,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) {
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
SDLoc dl(Node);
EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT();
@@ -14538,38 +16263,16 @@ static void ReplaceATOMIC_LOAD(SDNode *Node,
// (The only way to get a 16-byte load is cmpxchg16b)
// FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment.
SDValue Zero = DAG.getConstant(0, VT);
- SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT,
- Node->getOperand(0),
- Node->getOperand(1), Zero, Zero,
- cast<AtomicSDNode>(Node)->getMemOperand(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getOrdering(),
- cast<AtomicSDNode>(Node)->getSynchScope());
+ SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other);
+ SDValue Swap =
+ DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs,
+ Node->getOperand(0), Node->getOperand(1), Zero, Zero,
+ cast<AtomicSDNode>(Node)->getMemOperand(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getOrdering(),
+ cast<AtomicSDNode>(Node)->getSynchScope());
Results.push_back(Swap.getValue(0));
- Results.push_back(Swap.getValue(1));
-}
-
-static void
-ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG, unsigned NewOp) {
- SDLoc dl(Node);
- assert (Node->getValueType(0) == MVT::i64 &&
- "Only know how to expand i64 atomics");
-
- SDValue Chain = Node->getOperand(0);
- SDValue In1 = Node->getOperand(1);
- SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(0));
- SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- Node->getOperand(2), DAG.getIntPtrConstant(1));
- SDValue Ops[] = { Chain, In1, In2L, In2H };
- SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
- SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
- cast<MemSDNode>(Node)->getMemOperand());
- SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
- Results.push_back(Result.getValue(2));
+ Results.push_back(Swap.getValue(2));
}
/// ReplaceNodeResults - Replace a node with an illegal result type
@@ -14656,13 +16359,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case Intrinsic::x86_rdtscp:
return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
Results);
+ case Intrinsic::x86_rdpmc:
+ return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
}
}
case ISD::READCYCLECOUNTER: {
return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
Results);
}
- case ISD::ATOMIC_CMP_SWAP: {
+ case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
EVT T = N->getValueType(0);
assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
bool Regs64bit = T == MVT::i128;
@@ -14704,61 +16409,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+
+ SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
+ MVT::i32, cpOutH.getValue(2));
+ SDValue Success =
+ DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS);
+ Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
+
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
- Results.push_back(cpOutH.getValue(1));
+ Results.push_back(Success);
+ Results.push_back(EFLAGS.getValue(1));
return;
}
+ case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_SUB:
case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_NAND:
case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_LOAD_MAX:
case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_SWAP: {
- unsigned Opc;
- switch (N->getOpcode()) {
- default: llvm_unreachable("Unexpected opcode");
- case ISD::ATOMIC_LOAD_ADD:
- Opc = X86ISD::ATOMADD64_DAG;
- break;
- case ISD::ATOMIC_LOAD_AND:
- Opc = X86ISD::ATOMAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_NAND:
- Opc = X86ISD::ATOMNAND64_DAG;
- break;
- case ISD::ATOMIC_LOAD_OR:
- Opc = X86ISD::ATOMOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_SUB:
- Opc = X86ISD::ATOMSUB64_DAG;
- break;
- case ISD::ATOMIC_LOAD_XOR:
- Opc = X86ISD::ATOMXOR64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MAX:
- Opc = X86ISD::ATOMMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_MIN:
- Opc = X86ISD::ATOMMIN64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMAX:
- Opc = X86ISD::ATOMUMAX64_DAG;
- break;
- case ISD::ATOMIC_LOAD_UMIN:
- Opc = X86ISD::ATOMUMIN64_DAG;
- break;
- case ISD::ATOMIC_SWAP:
- Opc = X86ISD::ATOMSWAP64_DAG;
- break;
- }
- ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
- return;
- }
+ case ISD::ATOMIC_LOAD_UMAX:
+ // Delegate to generic TypeLegalization. Situations we can really handle
+ // should have already been dealt with by X86AtomicExpand.cpp.
+ break;
case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
return;
@@ -14779,6 +16456,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
MVT::v2f64, N->getOperand(0));
SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
+ if (ExperimentalVectorWideningLegalization) {
+ // If we are legalizing vectors by widening, we already have the desired
+ // legal vector type, just return it.
+ Results.push_back(ToVecInt);
+ return;
+ }
+
SmallVector<SDValue, 8> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
@@ -14810,6 +16494,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FST: return "X86ISD::FST";
case X86ISD::CALL: return "X86ISD::CALL";
case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG";
+ case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG";
+ case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG";
case X86ISD::BT: return "X86ISD::BT";
case X86ISD::CMP: return "X86ISD::CMP";
case X86ISD::COMI: return "X86ISD::COMI";
@@ -14863,12 +16549,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r";
case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG";
case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG";
- case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG";
- case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG";
- case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG";
- case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG";
- case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG";
- case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
+ case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
@@ -14909,6 +16590,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::TESTM: return "X86ISD::TESTM";
case X86ISD::TESTNM: return "X86ISD::TESTNM";
case X86ISD::KORTEST: return "X86ISD::KORTEST";
+ case X86ISD::PACKSS: return "X86ISD::PACKSS";
+ case X86ISD::PACKUS: return "X86ISD::PACKUS";
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
@@ -15173,7 +16856,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isUNPCKLMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKHMask(M, SVT, Subtarget->hasInt256()) ||
isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
- isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()));
+ isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) ||
+ isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256()));
}
bool
@@ -15256,685 +16940,6 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
return sinkMBB;
}
-// Get CMPXCHG opcode for the specified data type.
-static unsigned getCmpXChgOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::LCMPXCHG8;
- case MVT::i16: return X86::LCMPXCHG16;
- case MVT::i32: return X86::LCMPXCHG32;
- case MVT::i64: return X86::LCMPXCHG64;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get LOAD opcode for the specified data type.
-static unsigned getLoadOpcode(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::MOV8rm;
- case MVT::i16: return X86::MOV16rm;
- case MVT::i32: return X86::MOV32rm;
- case MVT::i64: return X86::MOV64rm;
- default:
- break;
- }
- llvm_unreachable("Invalid operand size!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction.
-static unsigned getNonAtomicOpcode(unsigned Opc) {
- switch (Opc) {
- case X86::ATOMAND8: return X86::AND8rr;
- case X86::ATOMAND16: return X86::AND16rr;
- case X86::ATOMAND32: return X86::AND32rr;
- case X86::ATOMAND64: return X86::AND64rr;
- case X86::ATOMOR8: return X86::OR8rr;
- case X86::ATOMOR16: return X86::OR16rr;
- case X86::ATOMOR32: return X86::OR32rr;
- case X86::ATOMOR64: return X86::OR64rr;
- case X86::ATOMXOR8: return X86::XOR8rr;
- case X86::ATOMXOR16: return X86::XOR16rr;
- case X86::ATOMXOR32: return X86::XOR32rr;
- case X86::ATOMXOR64: return X86::XOR64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction with
-// extra opcode.
-static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr;
- case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr;
- case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr;
- case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr;
- case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr;
- case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr;
- case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr;
- case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr;
- case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr;
- case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr;
- case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr;
- case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr;
- case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr;
- case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr;
- case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr;
- case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr;
- case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr;
- case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target.
-static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) {
- switch (Opc) {
- case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr;
- case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr;
- case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr;
- case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr;
- case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr;
- case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr;
- case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr;
- case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr;
- case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr;
- case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get opcode of the non-atomic one from the specified atomic instruction for
-// 64-bit data type on 32-bit target with extra opcode.
-static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc,
- unsigned &HiOpc,
- unsigned &ExtraOpc) {
- switch (Opc) {
- case X86::ATOMNAND6432:
- ExtraOpc = X86::NOT32r;
- HiOpc = X86::AND32rr;
- return X86::AND32rr;
- }
- llvm_unreachable("Unhandled atomic-load-op opcode!");
-}
-
-// Get pseudo CMOV opcode from the specified data type.
-static unsigned getPseudoCMOVOpc(EVT VT) {
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::i8: return X86::CMOV_GR8;
- case MVT::i16: return X86::CMOV_GR16;
- case MVT::i32: return X86::CMOV_GR32;
- default:
- break;
- }
- llvm_unreachable("Unknown CMOV opcode!");
-}
-
-// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions.
-// They will be translated into a spin-loop or compare-exchange loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1 = LOAD MI.addr
-// loop:
-// t4 = phi(t1, t3 / loop)
-// t2 = OP MI.val, t4
-// EAX = t4
-// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined]
-// t3 = EAX
-// JNE loop
-// sink:
-// dst = t3
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstReg, SrcReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
- MVT::SimpleValueType VT = *RC->vt_begin();
- unsigned t1 = MRI.createVirtualRegister(RC);
- unsigned t2 = MRI.createVirtualRegister(RC);
- unsigned t3 = MRI.createVirtualRegister(RC);
- unsigned t4 = MRI.createVirtualRegister(RC);
- unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT);
-
- unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT);
- unsigned LOADOpc = getLoadOpcode(VT);
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1 = LOAD [MI.addr]
- // mainMBB:
- // t4 = phi(t1 / thisMBB, t3 / mainMBB)
- // t1 = OP MI.val, EAX
- // EAX = t4
- // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined]
- // t3 = EAX
- // JNE mainMBB
- // sinkMBB:
- // dst = t3
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- }
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add a PHI.
- MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op opcode!");
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- case X86::ATOMXOR8:
- case X86::ATOMXOR16:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64: {
- unsigned ARITHOpc = getNonAtomicOpcode(Opc);
- BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg)
- .addReg(t4);
- break;
- }
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64: {
- unsigned Tmp = MRI.createVirtualRegister(RC);
- unsigned NOTOpc;
- unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc);
- BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg)
- .addReg(t4);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp);
- break;
- }
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64: {
- unsigned CMPOpc;
- unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc);
-
- BuildMI(mainMBB, DL, TII->get(CMPOpc))
- .addReg(SrcReg)
- .addReg(t4);
-
- if (Subtarget->hasCMov()) {
- if (VT != MVT::i8) {
- // Native support
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2)
- .addReg(SrcReg)
- .addReg(t4);
- } else {
- // Promote i8 to i32 to use CMOV32
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
- const TargetRegisterClass *RC32 =
- TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit);
- unsigned SrcReg32 = MRI.createVirtualRegister(RC32);
- unsigned AccReg32 = MRI.createVirtualRegister(RC32);
- unsigned Tmp = MRI.createVirtualRegister(RC32);
-
- unsigned Undef = MRI.createVirtualRegister(RC32);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32)
- .addReg(Undef)
- .addReg(SrcReg)
- .addImm(X86::sub_8bit);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32)
- .addReg(Undef)
- .addReg(t4)
- .addImm(X86::sub_8bit);
-
- BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp)
- .addReg(SrcReg32)
- .addReg(AccReg32);
-
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2)
- .addReg(Tmp, 0, X86::sub_8bit);
- }
- } else {
- // Use pseudo select and lower them.
- assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
- "Invalid atomic-load-op transformation!");
- unsigned SelOpc = getPseudoCMOVOpc(VT);
- X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc);
- assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!");
- MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2)
- .addReg(SrcReg).addReg(t4)
- .addImm(CC);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4)
- .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB);
- Phi->eraseFromParent();
- }
- break;
- }
- }
-
- // Copy PhyReg back from virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg)
- .addReg(t4);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.addReg(t2);
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy PhyReg back to virtual register.
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
- .addReg(PhyReg);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstReg)
- .addReg(t3);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
-// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic
-// instructions. They will be translated into a spin-loop or compare-exchange
-// loop from
-//
-// ...
-// dst = atomic-fetch-op MI.addr, MI.val
-// ...
-//
-// to
-//
-// ...
-// t1L = LOAD [MI.addr + 0]
-// t1H = LOAD [MI.addr + 4]
-// loop:
-// t4L = phi(t1L, t3L / loop)
-// t4H = phi(t1H, t3H / loop)
-// t2L = OP MI.val.lo, t4L
-// t2H = OP MI.val.hi, t4H
-// EAX = t4L
-// EDX = t4H
-// EBX = t2L
-// ECX = t2H
-// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
-// t3L = EAX
-// t3H = EDX
-// JNE loop
-// sink:
-// dstL = t3L
-// dstH = t3H
-// ...
-MachineBasicBlock *
-X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- MachineFunction *MF = MBB->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
-
- const BasicBlock *BB = MBB->getBasicBlock();
- MachineFunction::iterator I = MBB;
- ++I;
-
- assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 &&
- "Unexpected number of operands");
-
- assert(MI->hasOneMemOperand() &&
- "Expected atomic-load-op32 to have one memoperand");
-
- // Memory Reference
- MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
- MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
-
- unsigned DstLoReg, DstHiReg;
- unsigned SrcLoReg, SrcHiReg;
- unsigned MemOpndSlot;
-
- unsigned CurOp = 0;
-
- DstLoReg = MI->getOperand(CurOp++).getReg();
- DstHiReg = MI->getOperand(CurOp++).getReg();
- MemOpndSlot = CurOp;
- CurOp += X86::AddrNumOperands;
- SrcLoReg = MI->getOperand(CurOp++).getReg();
- SrcHiReg = MI->getOperand(CurOp++).getReg();
-
- const TargetRegisterClass *RC = &X86::GR32RegClass;
- const TargetRegisterClass *RC8 = &X86::GR8RegClass;
-
- unsigned t1L = MRI.createVirtualRegister(RC);
- unsigned t1H = MRI.createVirtualRegister(RC);
- unsigned t2L = MRI.createVirtualRegister(RC);
- unsigned t2H = MRI.createVirtualRegister(RC);
- unsigned t3L = MRI.createVirtualRegister(RC);
- unsigned t3H = MRI.createVirtualRegister(RC);
- unsigned t4L = MRI.createVirtualRegister(RC);
- unsigned t4H = MRI.createVirtualRegister(RC);
-
- unsigned LCMPXCHGOpc = X86::LCMPXCHG8B;
- unsigned LOADOpc = X86::MOV32rm;
-
- // For the atomic load-arith operator, we generate
- //
- // thisMBB:
- // t1L = LOAD [MI.addr + 0]
- // t1H = LOAD [MI.addr + 4]
- // mainMBB:
- // t4L = phi(t1L / thisMBB, t3L / mainMBB)
- // t4H = phi(t1H / thisMBB, t3H / mainMBB)
- // t2L = OP MI.val.lo, t4L
- // t2H = OP MI.val.hi, t4H
- // EBX = t2L
- // ECX = t2H
- // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined]
- // t3L = EAX
- // t3H = EDX
- // JNE loop
- // sinkMBB:
- // dstL = t3L
- // dstH = t3H
-
- MachineBasicBlock *thisMBB = MBB;
- MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
- MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(I, mainMBB);
- MF->insert(I, sinkMBB);
-
- MachineInstrBuilder MIB;
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- sinkMBB->splice(sinkMBB->begin(), MBB,
- std::next(MachineBasicBlock::iterator(MI)), MBB->end());
- sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- // thisMBB:
- // Lo
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) {
- unsigned flags = (*MMOI)->getFlags();
- flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags,
- (*MMOI)->getSize(),
- (*MMOI)->getBaseAlignment(),
- (*MMOI)->getTBAAInfo(),
- (*MMOI)->getRanges());
- MIB.addMemOperand(MMO);
- };
- MachineInstr *LowMI = MIB;
-
- // Hi
- MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H);
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- if (i == X86::AddrDisp) {
- MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32)
- } else {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- }
- MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end());
-
- thisMBB->addSuccessor(mainMBB);
-
- // mainMBB:
- MachineBasicBlock *origMainMBB = mainMBB;
-
- // Add PHIs.
- MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
-
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default:
- llvm_unreachable("Unhandled atomic-load-op6432 opcode!");
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L)
- .addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H)
- .addReg(SrcHiReg);
- break;
- }
- case X86::ATOMNAND6432: {
- unsigned HiOpc, NOTOpc;
- unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc);
- unsigned TmpL = MRI.createVirtualRegister(RC);
- unsigned TmpH = MRI.createVirtualRegister(RC);
- BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg)
- .addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg)
- .addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL);
- BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH);
- break;
- }
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- unsigned cL = MRI.createVirtualRegister(RC8);
- unsigned cH = MRI.createVirtualRegister(RC8);
- unsigned cL32 = MRI.createVirtualRegister(RC);
- unsigned cH32 = MRI.createVirtualRegister(RC);
- unsigned cc = MRI.createVirtualRegister(RC);
- // cl := cmp src_lo, lo
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(LoOpc), cL);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL);
- // ch := cmp src_hi, hi
- BuildMI(mainMBB, DL, TII->get(X86::CMP32rr))
- .addReg(SrcHiReg).addReg(t4H);
- BuildMI(mainMBB, DL, TII->get(HiOpc), cH);
- BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH);
- // cc := if (src_hi == hi) ? cl : ch;
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc)
- .addReg(cH32).addReg(cL32);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc)
- .addReg(cH32).addReg(cL32)
- .addImm(X86::COND_E);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- }
- BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc);
- if (Subtarget->hasCMov()) {
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L)
- .addReg(SrcLoReg).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H)
- .addReg(SrcHiReg).addReg(t4H);
- } else {
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L)
- .addReg(SrcLoReg).addReg(t4L)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the
- // 2nd CMOV lowering.
- mainMBB->addLiveIn(X86::EFLAGS);
- MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H)
- .addReg(SrcHiReg).addReg(t4H)
- .addImm(X86::COND_NE);
- mainMBB = EmitLoweredSelect(MIB, mainMBB);
- // Replace the original PHI node as mainMBB is changed after CMOV
- // lowering.
- BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L)
- .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB);
- BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H)
- .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB);
- PhiL->eraseFromParent();
- PhiH->eraseFromParent();
- }
- break;
- }
- case X86::ATOMSWAP6432: {
- unsigned HiOpc;
- unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc);
- BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg);
- BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg);
- break;
- }
- }
-
- // Copy EDX:EAX back from HiReg:LoReg
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H);
- // Copy ECX:EBX from t1H:t1L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H);
-
- MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc));
- for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
- MachineOperand NewMO = MI->getOperand(MemOpndSlot + i);
- if (NewMO.isReg())
- NewMO.setIsKill(false);
- MIB.addOperand(NewMO);
- }
- MIB.setMemRefs(MMOBegin, MMOEnd);
-
- // Copy EDX:EAX back to t3H:t3L
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX);
- BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX);
-
- BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
-
- mainMBB->addSuccessor(origMainMBB);
- mainMBB->addSuccessor(sinkMBB);
-
- // sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstLoReg)
- .addReg(t3L);
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(TargetOpcode::COPY), DstHiReg)
- .addReg(t3H);
-
- MI->eraseFromParent();
- return sinkMBB;
-}
-
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
@@ -16068,7 +17073,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
@@ -16324,7 +17329,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
@@ -16407,7 +17412,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -16433,7 +17438,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
- const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+ const TargetRegisterInfo* TRI = BB->getParent()->getTarget().getRegisterInfo();
if (!MI->killsRegister(X86::EFLAGS) &&
!checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
copy0MBB->addLiveIn(X86::EFLAGS);
@@ -16474,9 +17479,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
bool Is64Bit) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
MachineFunction *MF = BB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ DebugLoc DL = MI->getDebugLoc();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
@@ -16546,7 +17551,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
- getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ MF->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Is64Bit) {
BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
@@ -16594,8 +17599,8 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineBasicBlock *BB) const {
+ const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetMacho());
@@ -16651,10 +17656,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
+ MachineFunction *F = BB->getParent();
const X86InstrInfo *TII
- = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ = static_cast<const X86InstrInfo*>(F->getTarget().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
- MachineFunction *F = BB->getParent();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
@@ -16663,7 +17668,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
// FIXME: The 32-bit calls have non-standard calling conventions. Use a
// proper register mask.
const uint32_t *RegMask =
- getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
+ F->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
@@ -16675,7 +17680,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
- } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ } else if (F->getTarget().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
@@ -16707,9 +17712,8 @@ MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
const BasicBlock *BB = MBB->getBasicBlock();
@@ -16771,8 +17775,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
unsigned PtrStoreOpc = 0;
unsigned LabelReg = 0;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
- Reloc::Model RM = getTargetMachine().getRelocationModel();
- bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ Reloc::Model RM = MF->getTarget().getRelocationModel();
+ bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) &&
(RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
// Prepare IP either in reg or imm.
@@ -16816,7 +17820,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
MIB.addRegMask(RegInfo->getNoPreservedMask());
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(restoreMBB);
@@ -16845,9 +17849,8 @@ MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const {
DebugLoc DL = MI->getDebugLoc();
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
MachineFunction *MF = MBB->getParent();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
// Memory Reference
@@ -16863,7 +17866,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
unsigned Tmp = MRI.createVirtualRegister(RC);
// Since FP is only updated here but NOT referenced, it's treated as GPR.
const X86RegisterInfo *RegInfo =
- static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo());
+ static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo());
unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP;
unsigned SP = RegInfo->getStackRegister();
@@ -17038,12 +18041,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::FP80_TO_INT16_IN_MEM:
case X86::FP80_TO_INT32_IN_MEM:
case X86::FP80_TO_INT64_IN_MEM: {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineFunction *F = BB->getParent();
+ const TargetInstrInfo *TII = F->getTarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
- MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(*BB, MI, DL,
TII->get(X86::FNSTCW16m)), CWFrameIdx);
@@ -17123,7 +18126,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRM128MEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo());
+ return EmitPCMPSTRM(MI, BB, BB->getParent()->getTarget().getInstrInfo());
// String/text processing lowering.
case X86::PCMPISTRIREG:
@@ -17136,71 +18139,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::VPCMPESTRIMEM:
assert(Subtarget->hasSSE42() &&
"Target must have SSE4.2 or AVX features enabled");
- return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo());
+ return EmitPCMPSTRI(MI, BB, BB->getParent()->getTarget().getInstrInfo());
// Thread synchronization.
case X86::MONITOR:
- return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget);
+ return EmitMonitor(MI, BB, BB->getParent()->getTarget().getInstrInfo(), Subtarget);
// xbegin
case X86::XBEGIN:
- return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo());
-
- // Atomic Lowering.
- case X86::ATOMAND8:
- case X86::ATOMAND16:
- case X86::ATOMAND32:
- case X86::ATOMAND64:
- // Fall through
- case X86::ATOMOR8:
- case X86::ATOMOR16:
- case X86::ATOMOR32:
- case X86::ATOMOR64:
- // Fall through
- case X86::ATOMXOR16:
- case X86::ATOMXOR8:
- case X86::ATOMXOR32:
- case X86::ATOMXOR64:
- // Fall through
- case X86::ATOMNAND8:
- case X86::ATOMNAND16:
- case X86::ATOMNAND32:
- case X86::ATOMNAND64:
- // Fall through
- case X86::ATOMMAX8:
- case X86::ATOMMAX16:
- case X86::ATOMMAX32:
- case X86::ATOMMAX64:
- // Fall through
- case X86::ATOMMIN8:
- case X86::ATOMMIN16:
- case X86::ATOMMIN32:
- case X86::ATOMMIN64:
- // Fall through
- case X86::ATOMUMAX8:
- case X86::ATOMUMAX16:
- case X86::ATOMUMAX32:
- case X86::ATOMUMAX64:
- // Fall through
- case X86::ATOMUMIN8:
- case X86::ATOMUMIN16:
- case X86::ATOMUMIN32:
- case X86::ATOMUMIN64:
- return EmitAtomicLoadArith(MI, BB);
-
- // This group does 64-bit operations on a 32-bit host.
- case X86::ATOMAND6432:
- case X86::ATOMOR6432:
- case X86::ATOMXOR6432:
- case X86::ATOMNAND6432:
- case X86::ATOMADD6432:
- case X86::ATOMSUB6432:
- case X86::ATOMMAX6432:
- case X86::ATOMMIN6432:
- case X86::ATOMUMAX6432:
- case X86::ATOMUMIN6432:
- case X86::ATOMSWAP6432:
- return EmitAtomicLoadArith6432(MI, BB);
+ return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo());
case X86::VASTART_SAVE_XMM_REGS:
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
@@ -17473,13 +18420,385 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// \brief Get the PSHUF-style mask from PSHUF node.
+///
+/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4
+/// PSHUF-style masks that can be reused with such instructions.
+static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
+ SmallVector<int, 4> Mask;
+ bool IsUnary;
+ bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary);
+ (void)HaveMask;
+ assert(HaveMask);
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ return Mask;
+ case X86ISD::PSHUFLW:
+ Mask.resize(4);
+ return Mask;
+ case X86ISD::PSHUFHW:
+ Mask.erase(Mask.begin(), Mask.begin() + 4);
+ for (int &M : Mask)
+ M -= 4;
+ return Mask;
+ default:
+ llvm_unreachable("No valid shuffle instruction found!");
+ }
+}
+
+/// \brief Search for a combinable shuffle across a chain ending in pshufd.
+///
+/// We walk up the chain and look for a combinable shuffle, skipping over
+/// shuffles that we could hoist this shuffle's transformation past without
+/// altering anything.
+static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert(N.getOpcode() == X86ISD::PSHUFD &&
+ "Called with something other than an x86 128-bit half shuffle!");
+ SDLoc DL(N);
+
+ // Walk up a single-use chain looking for a combinable shuffle.
+ SDValue V = N.getOperand(0);
+ for (; V.hasOneUse(); V = V.getOperand(0)) {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing combined!
+
+ case ISD::BITCAST:
+ // Skip bitcasts as we always know the type for the target specific
+ // instructions.
+ continue;
+
+ case X86ISD::PSHUFD:
+ // Found another dword shuffle.
+ break;
+
+ case X86ISD::PSHUFLW:
+ // Check that the low words (being shuffled) are the identity in the
+ // dword shuffle, and the high words are self-contained.
+ if (Mask[0] != 0 || Mask[1] != 1 ||
+ !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4))
+ return false;
+
+ continue;
+
+ case X86ISD::PSHUFHW:
+ // Check that the high words (being shuffled) are the identity in the
+ // dword shuffle, and the low words are self-contained.
+ if (Mask[2] != 2 || Mask[3] != 3 ||
+ !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2))
+ return false;
+
+ continue;
+
+ case X86ISD::UNPCKL:
+ case X86ISD::UNPCKH:
+ // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
+ // shuffle into a preceding word shuffle.
+ if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16)
+ return false;
+
+ // Search for a half-shuffle which we can combine with.
+ unsigned CombineOp =
+ V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
+ if (V.getOperand(0) != V.getOperand(1) ||
+ !V->isOnlyUserOf(V.getOperand(0).getNode()))
+ return false;
+ V = V.getOperand(0);
+ do {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing to combine.
+
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ if (V.getOpcode() == CombineOp)
+ break;
+
+ // Fallthrough!
+ case ISD::BITCAST:
+ V = V.getOperand(0);
+ continue;
+ }
+ break;
+ } while (V.hasOneUse());
+ break;
+ }
+ // Break out of the loop if we break out of the switch.
+ break;
+ }
+
+ if (!V.hasOneUse())
+ // We fell out of the loop without finding a viable combining instruction.
+ return false;
+
+ // Record the old value to use in RAUW-ing.
+ SDValue Old = V;
+
+ // Merge this node's mask and our incoming mask.
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ for (int &M : Mask)
+ M = VMask[M];
+ V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0),
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // It is possible that one of the combinable shuffles was completely absorbed
+ // by the other, just replace it and revisit all users in that case.
+ if (Old.getNode() == V.getNode()) {
+ DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true);
+ return true;
+ }
+
+ // Replace N with its operand as we're going to combine that shuffle away.
+ DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
+ return true;
+}
+
+/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
+///
+/// We walk up the chain, skipping shuffles of the other half and looking
+/// through shuffles which switch halves trying to find a shuffle of the same
+/// pair of dwords.
+static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ assert(
+ (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) &&
+ "Called with something other than an x86 128-bit half shuffle!");
+ SDLoc DL(N);
+ unsigned CombineOpcode = N.getOpcode();
+
+ // Walk up a single-use chain looking for a combinable shuffle.
+ SDValue V = N.getOperand(0);
+ for (; V.hasOneUse(); V = V.getOperand(0)) {
+ switch (V.getOpcode()) {
+ default:
+ return false; // Nothing combined!
+
+ case ISD::BITCAST:
+ // Skip bitcasts as we always know the type for the target specific
+ // instructions.
+ continue;
+
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ if (V.getOpcode() == CombineOpcode)
+ break;
+
+ // Other-half shuffles are no-ops.
+ continue;
+
+ case X86ISD::PSHUFD: {
+ // We can only handle pshufd if the half we are combining either stays in
+ // its half, or switches to the other half. Bail if one of these isn't
+ // true.
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2;
+ if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) ||
+ (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2)))
+ return false;
+
+ // Map the mask through the pshufd and keep walking up the chain.
+ for (int i = 0; i < 4; ++i)
+ Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2;
+
+ // Switch halves if the pshufd does.
+ CombineOpcode =
+ VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW;
+ continue;
+ }
+ }
+ // Break out of the loop if we break out of the switch.
+ break;
+ }
+
+ if (!V.hasOneUse())
+ // We fell out of the loop without finding a viable combining instruction.
+ return false;
+
+ // Record the old value to use in RAUW-ing.
+ SDValue Old = V;
+
+ // Merge this node's mask and our incoming mask (adjusted to account for all
+ // the pshufd instructions encountered).
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ for (int &M : Mask)
+ M = VMask[M];
+ V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0),
+ getV4X86ShuffleImm8ForMask(Mask, DAG));
+
+ // Replace N with its operand as we're going to combine that shuffle away.
+ DAG.ReplaceAllUsesWith(N, N.getOperand(0));
+
+ // Replace the combinable shuffle with the combined one, updating all users
+ // so that we re-evaluate the chain here.
+ DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true);
+ return true;
+}
+
+/// \brief Try to combine x86 target specific shuffles.
+static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget *Subtarget) {
+ SDLoc DL(N);
+ MVT VT = N.getSimpleValueType();
+ SmallVector<int, 4> Mask;
+
+ switch (N.getOpcode()) {
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ Mask = getPSHUFShuffleMask(N);
+ assert(Mask.size() == 4);
+ break;
+ default:
+ return SDValue();
+ }
+
+ // Nuke no-op shuffles that show up after combining.
+ if (isNoopShuffleMask(Mask))
+ return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true);
+
+ // Look for simplifications involving one or two shuffle instructions.
+ SDValue V = N.getOperand(0);
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case X86ISD::PSHUFLW:
+ case X86ISD::PSHUFHW:
+ assert(VT == MVT::v8i16);
+ (void)VT;
+
+ if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
+ return SDValue(); // We combined away this shuffle, so we're done.
+
+ // See if this reduces to a PSHUFD which is no more expensive and can
+ // combine with more operations.
+ if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 &&
+ areAdjacentMasksSequential(Mask)) {
+ int DMask[] = {-1, -1, -1, -1};
+ int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2;
+ DMask[DOffset + 0] = DOffset + Mask[0] / 2;
+ DMask[DOffset + 1] = DOffset + Mask[2] / 2;
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V);
+ DCI.AddToWorklist(V.getNode());
+ V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V,
+ getV4X86ShuffleImm8ForMask(DMask, DAG));
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V);
+ }
+
+ // Look for shuffle patterns which can be implemented as a single unpack.
+ // FIXME: This doesn't handle the location of the PSHUFD generically, and
+ // only works when we have a PSHUFD followed by two half-shuffles.
+ if (Mask[0] == Mask[1] && Mask[2] == Mask[3] &&
+ (V.getOpcode() == X86ISD::PSHUFLW ||
+ V.getOpcode() == X86ISD::PSHUFHW) &&
+ V.getOpcode() != N.getOpcode() &&
+ V.hasOneUse()) {
+ SDValue D = V.getOperand(0);
+ while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
+ D = D.getOperand(0);
+ if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
+ SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
+ SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
+ int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
+ int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
+ int WordMask[8];
+ for (int i = 0; i < 4; ++i) {
+ WordMask[i + NOffset] = Mask[i] + NOffset;
+ WordMask[i + VOffset] = VMask[i] + VOffset;
+ }
+ // Map the word mask through the DWord mask.
+ int MappedMask[8];
+ for (int i = 0; i < 8; ++i)
+ MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2;
+ const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3};
+ const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7};
+ if (std::equal(std::begin(MappedMask), std::end(MappedMask),
+ std::begin(UnpackLoMask)) ||
+ std::equal(std::begin(MappedMask), std::end(MappedMask),
+ std::begin(UnpackHiMask))) {
+ // We can replace all three shuffles with an unpack.
+ V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0));
+ DCI.AddToWorklist(V.getNode());
+ return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL
+ : X86ISD::UNPCKH,
+ DL, MVT::v8i16, V, V);
+ }
+ }
+ }
+
+ break;
+
+ case X86ISD::PSHUFD:
+ if (combineRedundantDWordShuffle(N, Mask, DAG, DCI))
+ return SDValue(); // We combined away this shuffle.
+
+ break;
+ }
+
+ return SDValue();
+}
+
/// PerformShuffleCombine - Performs several different shuffle combines.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
SDLoc dl(N);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
+ // Canonicalize shuffles that perform 'addsub' on packed float vectors
+ // according to the rule:
+ // (shuffle (FADD A, B), (FSUB A, B), Mask) ->
+ // (shuffle (FSUB A, -B), (FADD A, -B), Mask)
+ //
+ // Where 'Mask' is:
+ // <0,5,2,7> -- for v4f32 and v4f64 shuffles;
+ // <0,3> -- for v2f64 shuffles;
+ // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles.
+ //
+ // This helps pattern-matching more SSE3/AVX ADDSUB instructions
+ // during ISel stage.
+ if (N->getOpcode() == ISD::VECTOR_SHUFFLE &&
+ ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+ (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+ N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB &&
+ // Operands to the FADD and FSUB must be the same.
+ ((N0->getOperand(0) == N1->getOperand(0) &&
+ N0->getOperand(1) == N1->getOperand(1)) ||
+ // FADD is commutable. See if by commuting the operands of the FADD
+ // we would still be able to match the operands of the FSUB dag node.
+ (N0->getOperand(1) == N1->getOperand(0) &&
+ N0->getOperand(0) == N1->getOperand(1))) &&
+ N0->getOperand(0)->getOpcode() != ISD::UNDEF &&
+ N0->getOperand(1)->getOpcode() != ISD::UNDEF) {
+
+ ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ ArrayRef<int> Mask = SV->getMask();
+ bool CanFold = true;
+
+ for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i)
+ CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i);
+
+ if (CanFold) {
+ SDValue Op0 = N1->getOperand(0);
+ SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1));
+ SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1);
+ SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1);
+ return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask);
+ }
+ }
+
// Don't create instructions with illegal types after legalize types has run.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
@@ -17490,6 +18809,57 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
N->getOpcode() == ISD::VECTOR_SHUFFLE)
return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
+ // During Type Legalization, when promoting illegal vector types,
+ // the backend might introduce new shuffle dag nodes and bitcasts.
+ //
+ // This code performs the following transformation:
+ // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) ->
+ // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>)
+ //
+ // We do this only if both the bitcast and the BINOP dag nodes have
+ // one use. Also, perform this transformation only if the new binary
+ // operation is legal. This is to avoid introducing dag nodes that
+ // potentially need to be further expanded (or custom lowered) into a
+ // less optimal sequence of dag nodes.
+ if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() &&
+ N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::BITCAST) {
+ SDValue BC0 = N0.getOperand(0);
+ EVT SVT = BC0.getValueType();
+ unsigned Opcode = BC0.getOpcode();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ if (BC0.hasOneUse() && SVT.isVector() &&
+ SVT.getVectorNumElements() * 2 == NumElts &&
+ TLI.isOperationLegal(Opcode, VT)) {
+ bool CanFold = false;
+ switch (Opcode) {
+ default : break;
+ case ISD::ADD :
+ case ISD::FADD :
+ case ISD::SUB :
+ case ISD::FSUB :
+ case ISD::MUL :
+ case ISD::FMUL :
+ CanFold = true;
+ }
+
+ unsigned SVTNumElts = SVT.getVectorNumElements();
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) == (int)(i * 2);
+ for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i)
+ CanFold = SVOp->getMaskElt(i) < 0;
+
+ if (CanFold) {
+ SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0));
+ SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01);
+ return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]);
+ }
+ }
+ }
+
// Only handle 128 wide vector from here on.
if (!VT.is128BitVector())
return SDValue();
@@ -17501,7 +18871,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
- return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true);
+ if (LD.getNode())
+ return LD;
+
+ if (isTargetShuffle(N->getOpcode())) {
+ SDValue Shuffle =
+ PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget);
+ if (Shuffle.getNode())
+ return Shuffle;
+ }
+
+ return SDValue();
}
/// PerformTruncateCombine - Converts truncate operation to
@@ -18155,28 +19536,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS))
return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
- // If the RHS is a constant we have to reverse the const canonicalization.
- // x > C-1 ? x+-C : 0 --> subus x, C
- if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
- isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) {
- APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (CondRHS.getConstantOperandVal(0) == -A-1)
- return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS,
- DAG.getConstant(-A, VT));
- }
-
- // Another special case: If C was a sign bit, the sub has been
- // canonicalized into a xor.
- // FIXME: Would it be better to use computeKnownBits to determine whether
- // it's safe to decanonicalize the xor?
- // x s< 0 ? x^C : 0 --> subus x, C
- if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
- ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
- isSplatVector(OpRHS.getNode())) {
- APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue();
- if (A.isSignBit())
- return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS);
- }
+ if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS))
+ if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) {
+ if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS))
+ if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode())
+ // If the RHS is a constant we have to reverse the const
+ // canonicalization.
+ // x > C-1 ? x+-C : 0 --> subus x, C
+ if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD &&
+ CondRHSConst->getAPIntValue() ==
+ (-OpRHSConst->getAPIntValue() - 1))
+ return DAG.getNode(
+ X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(-OpRHSConst->getAPIntValue(), VT));
+
+ // Another special case: If C was a sign bit, the sub has been
+ // canonicalized into a xor.
+ // FIXME: Would it be better to use computeKnownBits to determine
+ // whether it's safe to decanonicalize the xor?
+ // x s< 0 ? x^C : 0 --> subus x, C
+ if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
+ ISD::isBuildVectorAllZeros(CondRHS.getNode()) &&
+ OpRHSConst->getAPIntValue().isSignBit())
+ // Note that we have to rebuild the RHS constant here to ensure we
+ // don't rely on particular values of undef lanes.
+ return DAG.getNode(
+ X86ISD::SUBUS, DL, VT, OpLHS,
+ DAG.getConstant(OpRHSConst->getAPIntValue(), VT));
+ }
}
}
@@ -18743,6 +20130,8 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
if (C->isAllOnesValue())
return Op1;
}
+
+ return SDValue();
}
// Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
@@ -18882,16 +20271,15 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
// vector operations in many cases. Also, on sandybridge ADD is faster than
// shl.
// (shl V, 1) -> add V,V
- if (isSplatVector(N1.getNode())) {
- assert(N0.getValueType().isVector() && "Invalid vector shift type");
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
- // We shift all of the values by one. In many cases we do not have
- // hardware support for this operation. This is better expressed as an ADD
- // of two values.
- if (N1C && (1 == N1C->getZExtValue())) {
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
+ if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1))
+ if (auto *N1SplatC = N1BV->getConstantSplatNode()) {
+ assert(N0.getValueType().isVector() && "Invalid vector shift type");
+ // We shift all of the values by one. In many cases we do not have
+ // hardware support for this operation. This is better expressed as an ADD
+ // of two values.
+ if (N1SplatC->getZExtValue() == 1)
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
}
- }
return SDValue();
}
@@ -18910,10 +20298,9 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
SDValue Amt = N->getOperand(1);
SDLoc DL(N);
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
- APInt ShiftAmt = C->getAPIntValue();
+ if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
+ if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
+ APInt ShiftAmt = AmtSplat->getAPIntValue();
unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
// SSE2/AVX2 logical shifts always return a vector of 0s
@@ -18923,7 +20310,6 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
if (ShiftAmt.trunc(8).uge(MaxAmount))
return getZeroVector(VT, Subtarget, DAG, DL);
}
- }
return SDValue();
}
@@ -19117,9 +20503,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// The right side has to be a 'trunc' or a constant vector.
bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE;
- bool RHSConst = (isSplatVector(N1.getNode()) &&
- isa<ConstantSDNode>(N1->getOperand(0)));
- if (!RHSTrunc && !RHSConst)
+ ConstantSDNode *RHSConstSplat = nullptr;
+ if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1))
+ RHSConstSplat = RHSBV->getConstantSplatNode();
+ if (!RHSTrunc && !RHSConstSplat)
return SDValue();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -19129,9 +20516,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
// Set N0 and N1 to hold the inputs to the new wide operation.
N0 = N0->getOperand(0);
- if (RHSConst) {
+ if (RHSConstSplat) {
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
- N1->getOperand(0));
+ SDValue(RHSConstSplat, 0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
} else if (RHSTrunc) {
@@ -19277,12 +20664,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
unsigned SraAmt = ~0;
if (Mask.getOpcode() == ISD::SRA) {
- SDValue Amt = Mask.getOperand(1);
- if (isSplatVector(Amt.getNode())) {
- SDValue SclrAmt = Amt->getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt))
- SraAmt = C->getZExtValue();
- }
+ if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
+ if (auto *AmtConst = AmtBV->getConstantSplatNode())
+ SraAmt = AmtConst->getZExtValue();
} else if (Mask.getOpcode() == X86ISD::VSRAI) {
SDValue SraC = Mask.getOperand(1);
SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue();
@@ -20642,6 +22026,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
case X86ISD::INSERTPS:
return PerformINSERTPSCombine(N, DAG, Subtarget);
+ case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget);
}
return SDValue();
@@ -21146,8 +22531,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
const GlobalValue *GV = GA->getGlobal();
// If we require an extra load to get this address, as in PIC mode, we
// can't accept it.
- if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
- getTargetMachine())))
+ if (isGlobalStubReference(
+ Subtarget->ClassifyGlobalReference(GV, DAG.getTarget())))
return;
Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op),
@@ -21425,3 +22810,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
return AM.Scale != 0;
return -1;
}
+
+bool X86TargetLowering::isTargetFTOL() const {
+ return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 9f51b53..c8cdce7 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,13 +15,13 @@
#ifndef X86ISELLOWERING_H
#define X86ISELLOWERING_H
-#include "X86Subtarget.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
namespace llvm {
+ class X86Subtarget;
class X86TargetMachine;
namespace X86ISD {
@@ -86,6 +86,9 @@ namespace llvm {
/// X86 Read Time-Stamp Counter and Processor ID.
RDTSCP_DAG,
+ /// X86 Read Performance Monitoring Counters.
+ RDPMC_DAG,
+
/// X86 compare and logical compare instructions.
CMP, COMI, UCOMI,
@@ -315,6 +318,8 @@ namespace llvm {
KORTEST,
// Several flavors of instructions with vector shuffle behaviors.
+ PACKSS,
+ PACKUS,
PALIGNR,
PSHUFD,
PSHUFHW,
@@ -400,23 +405,8 @@ namespace llvm {
// XTEST - Test if in transactional execution.
XTEST,
- // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
- // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
- // Atomic 64-bit binary operations.
- ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
- ATOMSUB64_DAG,
- ATOMOR64_DAG,
- ATOMXOR64_DAG,
- ATOMAND64_DAG,
- ATOMNAND64_DAG,
- ATOMMAX64_DAG,
- ATOMMIN64_DAG,
- ATOMUMAX64_DAG,
- ATOMUMIN64_DAG,
- ATOMSWAP64_DAG,
-
// LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
- LCMPXCHG_DAG,
+ LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
LCMPXCHG8_DAG,
LCMPXCHG16_DAG,
@@ -766,9 +756,7 @@ namespace llvm {
/// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
/// for fptoui.
- bool isTargetFTOL() const {
- return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
- }
+ bool isTargetFTOL() const;
/// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
/// used for fptoui to the given type.
@@ -808,6 +796,9 @@ namespace llvm {
/// \brief Reset the operation actions based on target options.
void resetOperationActions() override;
+ /// \brief Customize the preferred legalization strategy for certain types.
+ LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(MVT VT) const override;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 37bcc52..41e900e 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -476,6 +476,28 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VT1>;
+multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ RegisterClass KRC> {
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
+ []>, EVEX;
+ def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask,
+ x86memop:$src),
+ !strconcat(OpcodeStr,
+ " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_KZ;
+ }
+}
+
+defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
+ i128mem, loadv2i64, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VT4>;
+defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
+ i256mem, loadv4i64, VK16WM>, VEX_W,
+ EVEX_V512, EVEX_CD8<64, CD8VT4>;
+
def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
(VPBROADCASTDZrr VR128X:$src)>;
def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
@@ -517,10 +539,12 @@ def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src),
[]>, EVEX;
}
+let Predicates = [HasCDI] in {
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512,
VK16, v16i32, v16i1>, EVEX_V512;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512,
VK8, v8i64, v8i1>, EVEX_V512, VEX_W;
+}
//===----------------------------------------------------------------------===//
// AVX-512 - VPERM
@@ -585,7 +609,7 @@ defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
// -- VPERM2I - 3 source operands form --
multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop,
- SDNode OpNode, ValueType OpVT> {
+ SDNode OpNode, ValueType OpVT, RegisterClass KRC> {
let Constraints = "$src1 = $dst" in {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
@@ -595,48 +619,107 @@ let Constraints = "$src1 = $dst" in {
(OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,
EVEX_4V;
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}}|"
+ "$dst {${mask}}, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ RC:$src3),
+ RC:$src1)))]>,
+ EVEX_4V, EVEX_K;
+
+ let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}} {z} |",
+ "$dst {${mask}} {z}, $src2, $src3}"),
+ [(set RC:$dst, (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ RC:$src3),
+ (OpVT (bitconvert
+ (v16i32 immAllZerosV))))))]>,
+ EVEX_4V, EVEX_KZ;
+
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
" \t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2,
+ (OpVT (OpNode RC:$src1, RC:$src2,
(mem_frag addr:$src3))))]>, EVEX_4V;
+
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}}|"
+ "$dst {${mask}}, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)),
+ RC:$src1)))]>,
+ EVEX_4V, EVEX_K;
+
+ let AddedComplexity = 10 in // Prefer over the rrkz variant
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3),
+ !strconcat(OpcodeStr,
+ " \t{$src3, $src2, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src2, $src3}"),
+ [(set RC:$dst,
+ (OpVT (vselect KRC:$mask,
+ (OpNode RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)),
+ (OpVT (bitconvert
+ (v16i32 immAllZerosV))))))]>,
+ EVEX_4V, EVEX_KZ;
}
}
-defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem,
- X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem,
- X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem,
- X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem,
- X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem,
- X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem,
- X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem,
- X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
- X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx),
- (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))),
- (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx),
- (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))),
- (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx),
- (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))),
- (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx),
- (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))),
- (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32,
+ i512mem, X86VPermiv3, v16i32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64,
+ i512mem, X86VPermiv3, v8i64, VK8WM>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32,
+ i512mem, X86VPermiv3, v16f32, VK16WM>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64,
+ i512mem, X86VPermiv3, v8f64, VK8WM>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ SDNode OpNode, ValueType OpVT, RegisterClass KRC,
+ ValueType MaskVT, RegisterClass MRC> :
+ avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode,
+ OpVT, KRC> {
+ def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
+ VR512:$idx, VR512:$src1, VR512:$src2, -1)),
+ (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+ def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512")
+ VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)),
+ (!cast<Instruction>(NAME#rrk) VR512:$src1,
+ (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>;
+}
+
+defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem,
+ X86VPermv3, v16i32, VK16WM, v16i1, GR16>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem,
+ X86VPermv3, v8i64, VK8WM, v8i1, GR8>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem,
+ X86VPermv3, v16f32, VK16WM, v16i1, GR16>,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem,
+ X86VPermv3, v8f64, VK8WM, v8i1, GR8>,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
@@ -790,52 +873,61 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
-multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC,
+multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC,
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
- SDNode OpNode, ValueType vt, Operand CC, string asm,
- string asm_alt> {
+ SDNode OpNode, ValueType vt, Operand CC, string Suffix> {
def rri : AVX512AIi8<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))],
IIC_SSE_ALU_F32P_RR>, EVEX_4V;
def rmi : AVX512AIi8<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc),
+ !strconcat("vpcmp${cc}", Suffix,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2),
imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512AIi8<opc, MRMSrcReg,
(outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RR>, EVEX_4V;
+ def rrik_alt : AVX512AIi8<opc, MRMSrcReg,
+ (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K;
def rmi_alt : AVX512AIi8<opc, MRMSrcMem,
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
- asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+ def rmik_alt : AVX512AIi8<opc, MRMSrcMem,
+ (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc),
+ !strconcat("vpcmp", Suffix,
+ "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"),
+ [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K;
}
}
-defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32,
- X86cmpm, v16i32, AVXCC,
- "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32,
- X86cmpmu, v16i32, AVXCC,
- "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64,
- X86cmpm, v8i64, AVXCC,
- "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64,
- X86cmpmu, v8i64, AVXCC,
- "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-// avx512_cmp_packed - sse 1 & 2 compare packed instructions
+defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32,
+ X86cmpm, v16i32, AVXCC, "d">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32,
+ X86cmpmu, v16i32, AVXCC, "ud">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64,
+ X86cmpm, v8i64, AVXCC, "q">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64,
+ X86cmpmu, v8i64, AVXCC, "uq">,
+ VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// avx512_cmp_packed - compare packed instructions
multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
X86MemOperand x86memop, ValueType vt,
string suffix, Domain d> {
@@ -859,11 +951,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC,
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
def rri_alt : AVX512PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
" \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
!strconcat("vcmp", suffix,
" \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>;
}
@@ -1788,6 +1880,46 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
(SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>;
//===----------------------------------------------------------------------===//
+// AVX-512 - Non-temporals
+//===----------------------------------------------------------------------===//
+
+def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst),
+ (ins i512mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR512:$dst,
+ (int_x86_avx512_movntdqa addr:$src))]>,
+ EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+
+// Prefer non-temporal over temporal versions
+let AddedComplexity = 400, SchedRW = [WriteStore] in {
+
+def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v16f32 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs),
+ (ins f512mem:$dst, VR512:$src),
+ "vmovntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+
+def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs),
+ (ins i512mem:$dst, VR512:$src),
+ "vmovntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8i64 VR512:$src),
+ addr:$dst)],
+ IIC_SSE_MOVNT>,
+ EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
+}
+
+//===----------------------------------------------------------------------===//
// AVX-512 - Integer arithmetic
//
multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3161,6 +3293,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
+def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
+ (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
+ (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
(VCVTDQ2PSZrrb VR512:$src, imm:$rc)>;
@@ -4343,6 +4479,37 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+let Predicates = [HasCDI] in {
+defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
+ i512mem, i32mem, "{1to16}">,
+ EVEX_V512, EVEX_CD8<32, CD8VF>;
+
+
+defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
+ i512mem, i64mem, "{1to8}">,
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+}
+
+def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
+ GR16:$mask),
+ (VPLZCNTDrrk VR512:$src1,
+ (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
+
+def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
+ GR8:$mask),
+ (VPLZCNTQrrk VR512:$src1,
+ (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+
+def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))),
+ (VPLZCNTDrm addr:$src)>;
+def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
+ (VPLZCNTDrr VR512:$src)>;
+def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))),
+ (VPLZCNTQrm addr:$src)>;
+def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
+ (VPLZCNTQrr VR512:$src)>;
+
def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 368e14b..f2574cc 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1278,8 +1278,10 @@ let isCompare = 1 in {
def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
- // register class is constrained to GR8_NOREX.
- let isPseudo = 1 in
+ // register class is constrained to GR8_NOREX. This pseudo is explicitly
+ // marked side-effect free, since it doesn't have an isel pattern like
+ // other test instructions.
+ let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 34d8fb9..ca4f608 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -110,7 +110,7 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
// When using segmented stacks these are lowered into instructions which first
// check if the current stacklet has enough free memory. If it does, memory is
-// allocated by bumping the stack pointer. Otherwise memory is allocated from
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
// the heap.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
@@ -197,6 +197,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
}
//===----------------------------------------------------------------------===//
+// Pseudo instructions used by unwind info.
+//
+let isPseudo = 1 in {
+ def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
+ "#SEH_PushReg $reg", []>;
+ def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveReg $reg, $dst", []>;
+ def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+ "#SEH_SaveXMM $reg, $dst", []>;
+ def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
+ "#SEH_StackAlloc $size", []>;
+ def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
+ "#SEH_SetFrame $reg, $offset", []>;
+ def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
+ "#SEH_PushFrame $mode", []>;
+ def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
+ "#SEH_EndPrologue", []>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
@@ -371,7 +391,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
[(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
Requires<[In64BitMode]>;
-
+
let Uses = [RAX,RCX,RDI] in
def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)], IIC_REP_STOS>, REP,
@@ -502,83 +522,6 @@ def CMOV_RFP80 : I<0, Pseudo,
//===----------------------------------------------------------------------===//
-// Atomic Instruction Pseudo Instructions
-//===----------------------------------------------------------------------===//
-
-// Pseudo atomic instructions
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> {
- let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in {
- let Defs = [EFLAGS, AL] in
- def NAME#8 : I<0, Pseudo, (outs GR8:$dst),
- (ins i8mem:$ptr, GR8:$val),
- !strconcat(mnemonic, "8 PSEUDO!"), []>;
- let Defs = [EFLAGS, AX] in
- def NAME#16 : I<0, Pseudo,(outs GR16:$dst),
- (ins i16mem:$ptr, GR16:$val),
- !strconcat(mnemonic, "16 PSEUDO!"), []>;
- let Defs = [EFLAGS, EAX] in
- def NAME#32 : I<0, Pseudo, (outs GR32:$dst),
- (ins i32mem:$ptr, GR32:$val),
- !strconcat(mnemonic, "32 PSEUDO!"), []>;
- let Defs = [EFLAGS, RAX] in
- def NAME#64 : I<0, Pseudo, (outs GR64:$dst),
- (ins i64mem:$ptr, GR64:$val),
- !strconcat(mnemonic, "64 PSEUDO!"), []>;
- }
-}
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> {
- def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val),
- (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val),
- (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val),
- (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>;
- def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val),
- (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>;
-}
-
-// Atomic exchange, and, or, xor
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">;
-
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">;
-defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">;
-
-multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> {
- let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX],
- mayLoad = 1, mayStore = 1, hasSideEffects = 0 in
- def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
- (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
- !strconcat(mnemonic, "6432 PSEUDO!"), []>;
-}
-
-defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">;
-defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">;
-defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">;
-defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">;
-defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">;
-defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">;
-defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">;
-defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">;
-defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">;
-defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">;
-defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">;
-
-//===----------------------------------------------------------------------===//
// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1696,20 +1639,34 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
// Increment reg.
-def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+// Do not make INC if it is slow
+def : Pat<(add GR8:$src, 1),
+ (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, 1),
+ (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, 1),
+ (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, 1),
+ (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, 1),
+ (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// Decrement reg.
-def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
+// Do not make DEC if it is slow
+def : Pat<(add GR8:$src, -1),
+ (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR16:$src, -1),
+ (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>;
+def : Pat<(add GR32:$src, -1),
+ (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>;
+def : Pat<(add GR64:$src, -1),
+ (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1582f43..6f0fa94 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
+def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
+
def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6993577..0d3afc4 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -97,14 +98,11 @@ struct X86OpTblEntry {
// Pin the vtable to this file.
void X86InstrInfo::anchor() {}
-X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
- : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::ADJCALLSTACKDOWN64
- : X86::ADJCALLSTACKDOWN32),
- (tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::ADJCALLSTACKUP64
- : X86::ADJCALLSTACKUP32)),
- TM(tm), RI(tm) {
+X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
+ : X86GenInstrInfo(
+ (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+ (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+ Subtarget(STI), RI(STI) {
static const X86OpTblEntry OpTbl2Addr[] = {
{ X86::ADC32ri, X86::ADC32mi, 0 },
@@ -1472,7 +1470,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVSX32rr8:
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
- if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (!Subtarget.is64Bit())
// It's not always legal to reference the low 8-bit of the larger
// register in 32-bit mode.
return false;
@@ -1950,7 +1948,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
unsigned Opc, leaInReg;
- if (TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ if (Subtarget.is64Bit()) {
Opc = X86::LEA64_32r;
leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
} else {
@@ -2006,7 +2004,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// just a single insert_subreg.
addRegReg(MIB, leaInReg, true, leaInReg, false);
} else {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (Subtarget.is64Bit())
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
else
leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
@@ -2076,13 +2074,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// we have better subtarget support, enable the 16-bit LEA generation here.
// 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+ bool is64Bit = Subtarget.is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
+ if (!Subtarget.hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
@@ -2094,7 +2092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::SHUFPDrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
+ if (!Subtarget.hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
@@ -2672,8 +2670,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) {
/// getSETFromCond - Return a set opcode for the given condition and
/// whether it has memory operand.
-static unsigned getSETFromCond(X86::CondCode CC,
- bool HasMemoryOperand) {
+unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) {
static const uint16_t Opc[16][2] = {
{ X86::SETAr, X86::SETAm },
{ X86::SETAEr, X86::SETAEm },
@@ -2693,14 +2690,14 @@ static unsigned getSETFromCond(X86::CondCode CC,
{ X86::SETSr, X86::SETSm }
};
- assert(CC < 16 && "Can only handle standard cond codes");
+ assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes");
return Opc[CC][HasMemoryOperand ? 1 : 0];
}
/// getCMovFromCond - Return a cmov opcode for the given condition,
/// register size in bytes, and operand type.
-static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes,
- bool HasMemoryOperand) {
+unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand) {
static const uint16_t Opc[32][3] = {
{ X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr },
{ X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr },
@@ -2976,7 +2973,7 @@ canInsertSelect(const MachineBasicBlock &MBB,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles, int &FalseCycles) const {
// Not all subtargets have cmov instructions.
- if (!TM.getSubtarget<X86Subtarget>().hasCMov())
+ if (!Subtarget.hasCMov())
return false;
if (Cond.size() != 1)
return false;
@@ -3027,8 +3024,7 @@ static bool isHReg(unsigned Reg) {
// Try and copy between VR128/VR64 and GR64 registers.
static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
- const X86Subtarget& Subtarget) {
-
+ const X86Subtarget &Subtarget) {
// SrcReg(VR128) -> DestReg(GR64)
// SrcReg(VR64) -> DestReg(GR64)
@@ -3107,8 +3103,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// First deal with the normal symmetric copies.
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
unsigned Opc = 0;
if (X86::GR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MOV64rr;
@@ -3120,7 +3116,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit()) {
+ Subtarget.is64Bit()) {
Opc = X86::MOV8rr_NOREX;
// Both operands must be encodable without an REX prefix.
assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) &&
@@ -3137,7 +3133,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (X86::VR256RegClass.contains(DestReg, SrcReg))
Opc = X86::VMOVAPSYrr;
if (!Opc)
- Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>());
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -3183,9 +3179,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
static unsigned getLoadStoreRegOpcode(unsigned Reg,
const TargetRegisterClass *RC,
bool isStackAligned,
- const TargetMachine &TM,
+ const X86Subtarget &STI,
bool load) {
- if (TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (STI.hasAVX512()) {
if (X86::VK8RegClass.hasSubClassEq(RC) ||
X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
@@ -3197,13 +3193,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
}
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = STI.hasAVX();
switch (RC->getSize()) {
default:
llvm_unreachable("Unknown spill size");
case 1:
assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass");
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (STI.is64Bit())
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC))
@@ -3270,16 +3266,16 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
static unsigned getStoreRegOpcode(unsigned SrcReg,
const TargetRegisterClass *RC,
bool isStackAligned,
- TargetMachine &TM) {
- return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+ const X86Subtarget &STI) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false);
}
static unsigned getLoadRegOpcode(unsigned DestReg,
const TargetRegisterClass *RC,
bool isStackAligned,
- const TargetMachine &TM) {
- return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+ const X86Subtarget &STI) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -3291,9 +3287,10 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
"Stack slot too small for store");
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ bool isAligned =
+ (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
.addReg(SrcReg, getKillRegState(isKill));
@@ -3309,7 +3306,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
- unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+ unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
@@ -3327,9 +3324,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
- bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) ||
- RI.canRealignStack(MF);
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ bool isAligned =
+ (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) ||
+ RI.canRealignStack(MF);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL = MBB.findDebugLoc(MI);
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
}
@@ -3343,7 +3341,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
bool isAligned = MMOBegin != MMOEnd &&
(*MMOBegin)->getAlignment() >= Alignment;
- unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+ unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
for (unsigned i = 0, e = Addr.size(); i != e; ++i)
@@ -3741,7 +3739,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
continue;
// EFLAGS is used by this instruction.
- X86::CondCode OldCC;
+ X86::CondCode OldCC = X86::COND_INVALID;
bool OpcIsSET = false;
if (IsCmpZero || IsSwapped) {
// We decode the condition code from opcode.
@@ -3964,7 +3962,7 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
}
bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = Subtarget.hasAVX();
MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
switch (MI->getOpcode()) {
case X86::MOV32r0:
@@ -4075,7 +4073,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned Size, unsigned Align) const {
const DenseMap<unsigned,
std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
- bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
+ bool isCallRegIndirect = Subtarget.callRegIndirect();
bool isTwoAddrFold = false;
// Atom favors register form of call. So, we do not fold loads into calls
@@ -4316,7 +4314,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
if (X86::VR128RegClass.contains(Reg)) {
// These instructions are all floating point domain, so xorps is the best
// choice.
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+ bool HasAVX = Subtarget.hasAVX();
unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
@@ -4352,7 +4350,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// If the function stack isn't realigned we don't want to fold instructions
// that need increased alignment.
if (!RI.needsStackRealignment(MF))
- Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment());
+ Alignment = std::min(
+ Alignment, MF.getTarget().getFrameLowering()->getStackAlignment());
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
unsigned RCSize = 0;
@@ -4453,14 +4452,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
- if (TM.getCodeModel() != CodeModel::Small &&
- TM.getCodeModel() != CodeModel::Kernel)
+ if (MF.getTarget().getCodeModel() != CodeModel::Small &&
+ MF.getTarget().getCodeModel() != CodeModel::Kernel)
return nullptr;
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
- if (TM.getRelocationModel() == Reloc::PIC_) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
+ if (MF.getTarget().getRelocationModel() == Reloc::PIC_) {
+ if (Subtarget.is64Bit())
PICBase = X86::RIP;
else
// FIXME: PICBase = getGlobalBaseReg(&MF);
@@ -4600,7 +4599,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@@ -4748,13 +4747,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Do not introduce a slow unaligned load.
return false;
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
- Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+ Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
VT, MVT::Other, AddrOps);
NewNodes.push_back(Load);
@@ -4791,15 +4790,15 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ !Subtarget.isUnalignedMemAccessFast())
// Do not introduce a slow unaligned store.
return false;
unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
bool isAligned = (*MMOs.first) &&
(*MMOs.first)->getAlignment() >= Alignment;
- SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
- isAligned, TM),
- dl, MVT::Other, AddrOps);
+ SDNode *Store =
+ DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget),
+ dl, MVT::Other, AddrOps);
NewNodes.push_back(Store);
// Preserve memory reference information.
@@ -4960,7 +4959,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
- if (TM.getSubtargetImpl()->is64Bit()) {
+ if (Subtarget.is64Bit()) {
if (NumLoads >= 3)
return false;
} else if (NumLoads) {
@@ -4986,7 +4985,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
// Check if this processor supports macro-fusion. Since this is a minor
// heuristic, we haven't specifically reserved a feature. hasAVX is a decent
// proxy for SandyBridge+.
- if (!TM.getSubtarget<X86Subtarget>().hasAVX())
+ if (!Subtarget.hasAVX())
return false;
enum {
@@ -5038,6 +5037,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First,
case X86::TEST16rm:
case X86::TEST32rm:
case X86::TEST64rm:
+ case X86::TEST8ri_NOREX:
case X86::AND16i16:
case X86::AND16ri:
case X86::AND16ri8:
@@ -5168,7 +5168,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
- assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+ assert(!Subtarget.is64Bit() &&
"X86-64 PIC uses RIP relative addressing");
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
@@ -5271,7 +5271,7 @@ static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
- bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ bool hasAVX2 = Subtarget.hasAVX2();
uint16_t validDomains = 0;
if (domain && lookup(MI->getOpcode(), domain))
validDomains = 0xe;
@@ -5286,7 +5286,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(dom && "Not an SSE instruction");
const uint16_t *table = lookup(MI->getOpcode(), dom);
if (!table) { // try the other table
- assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+ assert((Subtarget.hasAVX2() || Domain < 3) &&
"256-bit vector operations only available in AVX2");
table = lookupAVX2(MI->getOpcode(), dom);
}
@@ -5299,6 +5299,16 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+void X86InstrInfo::getUnconditionalBranch(
+ MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
+ Branch.setOpcode(X86::JMP_4);
+ Branch.addOperand(MCOperand::CreateExpr(BranchTarget));
+}
+
+void X86InstrInfo::getTrap(MCInst &MI) const {
+ MI.setOpcode(X86::TRAP);
+}
+
bool X86InstrInfo::isHighLatencyDef(int opc) const {
switch (opc) {
default: return false;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5f34915..c177e3a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -24,7 +24,7 @@
namespace llvm {
class X86RegisterInfo;
- class X86TargetMachine;
+ class X86Subtarget;
namespace X86 {
// X86 specific condition code. These correspond to X86_*_COND in
@@ -46,6 +46,7 @@ namespace X86 {
COND_O = 13,
COND_P = 14,
COND_S = 15,
+ LAST_VALID_COND = COND_S,
// Artificial condition codes. These are used by AnalyzeBranch
// to indicate a block terminated with two conditional branches to
@@ -61,12 +62,21 @@ namespace X86 {
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
+ /// \brief Return a set opcode for the given condition and whether it has
+ /// a memory operand.
+ unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false);
+
+ /// \brief Return a cmov opcode for the given condition, register size in
+ /// bytes, and operand type.
+ unsigned getCMovFromCond(CondCode CC, unsigned RegBytes,
+ bool HasMemoryOperand = false);
+
// Turn CMov opcode into condition code.
CondCode getCondFromCMovOpc(unsigned Opc);
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
- CondCode GetOppositeBranchCondition(X86::CondCode CC);
+ CondCode GetOppositeBranchCondition(CondCode CC);
} // end namespace X86;
@@ -129,7 +139,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
}
class X86InstrInfo final : public X86GenInstrInfo {
- X86TargetMachine &TM;
+ X86Subtarget &Subtarget;
const X86RegisterInfo RI;
/// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
@@ -156,7 +166,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
virtual void anchor();
public:
- explicit X86InstrInfo(X86TargetMachine &tm);
+ explicit X86InstrInfo(X86Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -396,6 +406,12 @@ public:
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+ void
+ getUnconditionalBranch(MCInst &Branch,
+ const MCSymbolRefExpr *BranchTarget) const override;
+
+ void getTrap(MCInst &MI) const override;
+
bool isHighLatencyDef(int opc) const override;
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0d97669..e7b532c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -155,27 +155,6 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
-def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
- [SDNPHasChain, SDNPMayStore,
- SDNPMayLoad, SDNPMemOperand]>;
def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -208,6 +187,8 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
@@ -795,6 +776,7 @@ def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">;
+def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 1eb0485..f9a5ae1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -4337,20 +4337,6 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
SSE_INTALU_ITINS_P, 0>;
//===---------------------------------------------------------------------===//
-// SSE2 - Packed Integer Pack Instructions
-//===---------------------------------------------------------------------===//
-
-defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
- int_x86_avx2_packsswb,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
- int_x86_avx2_packssdw,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
- int_x86_avx2_packuswb,
- SSE_INTALU_ITINS_SHUFF_P, 0>;
-
-//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Shuffle Instructions
//===---------------------------------------------------------------------===//
@@ -4432,6 +4418,136 @@ let Predicates = [UseSSE2] in {
}
//===---------------------------------------------------------------------===//
+// Packed Integer Pack Instructions (SSE & AVX)
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+ bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode VR128:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : PDI<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def Yrm : PDI<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag,
+ bit Is2Addr = 1> {
+ def rr : SS48I<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def rm : SS48I<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (OutVT (OpNode VR128:$src1,
+ (bc_frag (memopv2i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT,
+ ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> {
+ def Yrr : SS48I<opc, MRMSrcReg,
+ (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>,
+ Sched<[WriteShuffle]>;
+ def Yrm : SS48I<opc, MRMSrcMem,
+ (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst,
+ (OutVT (OpNode VR256:$src1,
+ (bc_frag (memopv4i64 addr:$src2)))))]>,
+ Sched<[WriteShuffleLd, ReadAfterLd]>;
+}
+
+let Predicates = [HasAVX] in {
+ defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss,
+ bc_v4i32, 0>, VEX_4V;
+
+ defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus,
+ bc_v8i16, 0>, VEX_4V;
+ defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus,
+ bc_v4i32, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+ defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss,
+ bc_v8i32>, VEX_4V, VEX_L;
+
+ defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus,
+ bc_v16i16>, VEX_4V, VEX_L;
+ defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus,
+ bc_v8i32>, VEX_4V, VEX_L;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss,
+ bc_v8i16>;
+ defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss,
+ bc_v4i32>;
+
+ defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus,
+ bc_v8i16>;
+
+ let Predicates = [HasSSE41] in
+ defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus,
+ bc_v4i32>;
+}
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Unpack Instructions
//===---------------------------------------------------------------------===//
@@ -5239,6 +5355,60 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
f128mem, SSE_ALU_F64P>, PD;
}
+// Patterns used to select 'addsub' instructions.
+let Predicates = [HasAVX] in {
+ // Constant 170 corresponds to the binary mask '10101010'.
+ // When used as a blend mask, it allows selecting eight elements from two
+ // input vectors as follow:
+ // - Even-numbered values in the destination are copied from
+ // the corresponding elements in the first input vector;
+ // - Odd-numbered values in the destination are copied from
+ // the corresponding elements in the second input vector.
+
+ def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)),
+ (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))),
+ (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>;
+
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the two pattens below, constant 10 is used as a blend mask to select
+ // - the 1st and 3rd element from the first input vector (the 'fsub' node);
+ // - the 2nd and 4th element from the second input vector (the 'fadd' node).
+
+ def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)),
+ (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))),
+ (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>;
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
+let Predicates = [UseSSE3] in {
+ // Constant 10 corresponds to the binary mask '1010'.
+ // In the pattern below, it is used as a blend mask to select:
+ // - the 1st and 3rd element from the first input vector (the fsub node);
+ // - the 2nd and 4th element from the second input vector (the fadd node).
+
+ def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)),
+ (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))),
+ (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>;
+
+ def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))),
+ (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+ def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)),
+ (v2f64 (fsub VR128:$lhs, VR128:$rhs)))),
+ (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>;
+}
+
//===---------------------------------------------------------------------===//
// SSE3 Instructions
//===---------------------------------------------------------------------===//
@@ -7053,8 +7223,6 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in {
let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V;
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
@@ -7086,9 +7254,6 @@ let Predicates = [HasAVX] in {
let Predicates = [HasAVX2] in {
let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
- int_x86_avx2_packusdw, WriteShuffle>,
- VEX_4V, VEX_L;
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
@@ -7120,8 +7285,6 @@ let Predicates = [HasAVX2] in {
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw,
- 1, DEFAULT_ITINS_SHUFFLESCHED>;
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
@@ -7969,6 +8132,16 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
+class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, ValueType VT,
+ PatFrag ld_frag, SchedWrite Sched> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
+ Sched<[Sched]>, VEX {
+ let mayLoad = 1;
+}
+
// AVX2 adds register forms
class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
Intrinsic Int, SchedWrite Sched> :
@@ -7977,16 +8150,15 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
let ExeDomain = SSEPackedSingle in {
- def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
- int_x86_avx_vbroadcast_ss, WriteLoad>;
- def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
- int_x86_avx_vbroadcast_ss_256,
- WriteFShuffleLd>, VEX_L;
+ def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+ f32mem, v4f32, loadf32, WriteLoad>;
+ def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+ f32mem, v8f32, loadf32,
+ WriteFShuffleLd>, VEX_L;
}
let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
- int_x86_avx_vbroadcast_sd_256,
- WriteFShuffleLd>, VEX_L;
+def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+ v4f64, loadf64, WriteFShuffleLd>, VEX_L;
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
int_x86_avx_vbroadcastf128_pd_256,
WriteFShuffleLd>, VEX_L;
@@ -8366,6 +8538,21 @@ let Predicates = [HasF16C] in {
(VCVTPH2PSrm addr:$src)>;
}
+// Patterns for matching conversions from float to half-float and vice versa.
+let Predicates = [HasF16C] in {
+ def : Pat<(f32_to_f16 FR32:$src),
+ (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
+ (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+
+ def : Pat<(f16_to_f32 GR16:$src),
+ (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+ (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
+
+ def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
+ (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+ (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+}
+
//===----------------------------------------------------------------------===//
// AVX2 Instructions
//===----------------------------------------------------------------------===//
@@ -8543,13 +8730,6 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
}
let Predicates = [HasAVX] in {
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSYrm addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
- (VBROADCASTSDYrm addr:$src)>;
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
- (VBROADCASTSSrm addr:$src)>;
-
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
let AddedComplexity = 20 in {
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index b5595cb..5402780 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -439,7 +439,10 @@ def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
let SchedRW = [WriteSystem] in {
def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB;
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB;
-def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB;
+
+let Defs = [RAX, RDX], Uses = [ECX] in
+ def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
+ TB;
def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index e969ef2..a082c4f 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -432,7 +432,7 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
// SSE Callback should be called for SSE-enabled LLVM.
return X86CompilationCallback_SSE;
#else
- if (Subtarget->hasSSE1())
+ if (useSSE)
return X86CompilationCallback_SSE;
#endif
#endif
@@ -440,8 +440,8 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
return X86CompilationCallback;
}
-X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
- Subtarget = &TM.getSubtarget<X86Subtarget>();
+X86JITInfo::X86JITInfo(bool UseSSE) {
+ useSSE = UseSSE;
useGOT = 0;
TLSOffset = nullptr;
}
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 4d279de..564343f 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -19,16 +19,14 @@
#include "llvm/Target/TargetJITInfo.h"
namespace llvm {
- class X86TargetMachine;
class X86Subtarget;
class X86JITInfo : public TargetJITInfo {
- X86TargetMachine &TM;
- const X86Subtarget *Subtarget;
uintptr_t PICBase;
- char* TLSOffset;
+ char *TLSOffset;
+ bool useSSE;
public:
- explicit X86JITInfo(X86TargetMachine &tm);
+ explicit X86JITInfo(bool UseSSE);
/// replaceMachineCodeForFunction - Make it so that calling the function
/// whose machine code is at OLD turns into a call to NEW, perhaps by
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 0190080..2bd70a9 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM,
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
X86MCInstLower MCInstLowering(*MF, *this);
+ const X86RegisterInfo *RI =
+ static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
llvm_unreachable("Should be handled target independently");
@@ -883,6 +887,37 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addReg(X86::R10)
.addReg(X86::RAX));
return;
+
+ case X86::SEH_PushReg:
+ OutStreamer.EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
+ return;
+
+ case X86::SEH_SaveReg:
+ OutStreamer.EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_SaveXMM:
+ OutStreamer.EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_StackAlloc:
+ OutStreamer.EmitWinCFIAllocStack(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_SetFrame:
+ OutStreamer.EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
+ MI->getOperand(1).getImm());
+ return;
+
+ case X86::SEH_PushFrame:
+ OutStreamer.EmitWinCFIPushFrame(MI->getOperand(0).getImm());
+ return;
+
+ case X86::SEH_EndPrologue:
+ OutStreamer.EmitWinCFIEndProlog();
+ return;
}
MCInst TmpInst;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a83e1e4..e8a7e84 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -53,20 +53,18 @@ static cl::opt<bool>
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
-X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm)
- : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false),
- X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true),
- (tm.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::RIP : X86::EIP)),
- TM(tm) {
+X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI)
+ : X86GenRegisterInfo(
+ (STI.is64Bit() ? X86::RIP : X86::EIP),
+ X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false),
+ X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true),
+ (STI.is64Bit() ? X86::RIP : X86::EIP)),
+ Subtarget(STI) {
X86_MC::InitLLVM2SEHRegisterMapping(this);
// Cache some information.
- const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
- Is64Bit = Subtarget->is64Bit();
- IsWin64 = Subtarget->isTargetWin64();
+ Is64Bit = Subtarget.is64Bit();
+ IsWin64 = Subtarget.isTargetWin64();
if (Is64Bit) {
SlotSize = 8;
@@ -83,21 +81,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm)
BasePtr = Is64Bit ? X86::RBX : X86::ESI;
}
-/// getCompactUnwindRegNum - This function maps the register to the number for
-/// compact unwind encoding. Return -1 if the register isn't valid.
-int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
- switch (getLLVMRegNum(RegNum, isEH)) {
- case X86::EBX: case X86::RBX: return 1;
- case X86::ECX: case X86::R12: return 2;
- case X86::EDX: case X86::R13: return 3;
- case X86::EDI: case X86::R14: return 4;
- case X86::ESI: case X86::R15: return 5;
- case X86::EBP: case X86::RBP: return 6;
- }
-
- return -1;
-}
-
bool
X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
// ExeDepsFixer and PostRAScheduler require liveness.
@@ -173,9 +156,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
}
const TargetRegisterClass *
-X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
- const {
- const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
switch (Kind) {
default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
case 0: // Normal GPRs.
@@ -225,7 +207,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
case X86::GR64RegClassID:
return 12 - FPDiff;
case X86::VR128RegClassID:
- return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ return Subtarget.is64Bit() ? 10 : 4;
case X86::VR64RegClassID:
return 4;
}
@@ -233,8 +215,8 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
assert(MF && "MachineFunction required");
switch (MF->getFunction()->getCallingConv()) {
@@ -287,8 +269,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t*
X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
- bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
- bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX512 = Subtarget.hasAVX512();
switch (CC) {
case CallingConv::GHC:
@@ -406,7 +388,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(*AI);
}
}
- if (!Is64Bit || !TM.getSubtarget<X86Subtarget>().hasAVX512()) {
+ if (!Is64Bit || !Subtarget.hasAVX512()) {
for (unsigned n = 16; n != 32; ++n) {
for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)
Reserved.set(*AI);
@@ -459,7 +441,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 2289d91..74efd1f 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -22,11 +22,11 @@
namespace llvm {
class Type;
class TargetInstrInfo;
- class X86TargetMachine;
+ class X86Subtarget;
class X86RegisterInfo final : public X86GenRegisterInfo {
public:
- X86TargetMachine &TM;
+ const X86Subtarget &Subtarget;
private:
/// Is64Bit - Is the target 64-bits.
@@ -55,15 +55,11 @@ private:
unsigned BasePtr;
public:
- X86RegisterInfo(X86TargetMachine &tm);
+ X86RegisterInfo(const X86Subtarget &STI);
// FIXME: This should be tablegen'd like getDwarfRegNum is
int getSEHRegNum(unsigned i) const;
- /// getCompactUnwindRegNum - This function maps the register to the number for
- /// compact unwind encoding. Return -1 if the register isn't valid.
- int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const override;
-
/// Code Generation virtual methods...
///
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 744890d..a83dd9b 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,21 +11,23 @@
//
//===----------------------------------------------------------------------===//
-#include "X86TargetMachine.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86SelectionDAGInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Target/TargetLowering.h"
+
using namespace llvm;
#define DEBUG_TYPE "x86-selectiondag-info"
-X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
- TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<X86Subtarget>()),
- TLI(*TM.getTargetLowering()) {
-}
+X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
-X86SelectionDAGInfo::~X86SelectionDAGInfo() {
-}
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {}
SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
@@ -35,6 +37,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
// If to a segment-relative address space, use the default lowering.
if (DstPtrInfo.getAddrSpace() >= 256)
@@ -43,16 +46,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
// If not DWORD aligned or size is more than the threshold, call the library.
// The libc version is likely to be faster for these cases. It can use the
// address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- !ConstantSize ||
- ConstantSize->getZExtValue() >
- Subtarget->getMaxInlineSizeThreshold()) {
+ if ((Align & 3) != 0 || !ConstantSize ||
+ ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) {
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) {
- EVT IntPtr = TLI.getPointerTy();
+ V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
+ EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -65,10 +66,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args),
+ 0)
.setDiscardResult();
- std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
+ std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI);
return CallResult.second;
}
@@ -99,7 +101,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
ValReg = X86::EAX;
Val = (Val << 8) | Val;
Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
AVT = MVT::i64;
ValReg = X86::RAX;
Val = (Val << 32) | Val;
@@ -128,13 +130,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
InFlag = Chain.getValue(1);
}
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+ Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+ Dst, InFlag);
InFlag = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -182,10 +182,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
// This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>();
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
return SDValue();
/// If not DWORD aligned, it is more efficient to call the library. However
@@ -218,7 +219,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
AVT = MVT::i32;
else
// QWORD aligned
- AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32;
unsigned UBytes = AVT.getSizeInBits() / 8;
unsigned CountVal = SizeVal / UBytes;
@@ -226,15 +227,15 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
unsigned BytesLeft = SizeVal % UBytes;
SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX :
X86::ECX,
Count, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI :
X86::EDI,
Dst, InFlag);
InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI :
X86::ESI,
Src, InFlag);
InFlag = Chain.getValue(1);
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 0d5dc38..c12555a 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -23,14 +23,8 @@ class X86TargetMachine;
class X86Subtarget;
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const X86Subtarget *Subtarget;
-
- const X86TargetLowering &TLI;
-
public:
- explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+ explicit X86SelectionDAGInfo(const DataLayout &DL);
~X86SelectionDAGInfo();
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 989e0d6..79b7e68 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -291,13 +291,60 @@ void X86Subtarget::initializeEnvironment() {
CallRegIndirect = false;
LEAUsesAG = false;
SlowLEA = false;
+ SlowIncDec = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
}
+static std::string computeDataLayout(const X86Subtarget &ST) {
+ // X86 is little endian
+ std::string Ret = "e";
+
+ Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
+ // X86 and x32 have 32 bit pointers.
+ if (ST.isTarget64BitILP32() || !ST.is64Bit())
+ Ret += "-p:32:32";
+
+ // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
+ if (ST.is64Bit() || ST.isOSWindows() || ST.isTargetNaCl())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // Some ABIs align long double to 128 bits, others to 32.
+ if (ST.isTargetNaCl())
+ ; // No f80
+ else if (ST.is64Bit() || ST.isTargetDarwin())
+ Ret += "-f80:128";
+ else
+ Ret += "-f80:32";
+
+ // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
+ if (ST.is64Bit())
+ Ret += "-n8:16:32:64";
+ else
+ Ret += "-n8:16:32";
+
+ // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
+ if (!ST.is64Bit() && ST.isOSWindows())
+ Ret += "-S32";
+ else
+ Ret += "-S128";
+
+ return Ret;
+}
+
+X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS, unsigned StackAlignOverride)
+ const std::string &FS, X86TargetMachine &TM,
+ unsigned StackAlignOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
@@ -305,10 +352,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
In16BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() == Triple::CODE16) {
- initializeEnvironment();
- resetSubtargetFeatures(CPU, FS);
-}
+ TargetTriple.getEnvironment() == Triple::CODE16),
+ DL(computeDataLayout(*this)), TSInfo(DL),
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
+ FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(),
+ is64Bit() ? -8 : -4),
+ JITInfo(hasSSE1()) {}
bool
X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 703559a..09db0eb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,6 +14,11 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -40,6 +45,7 @@ enum Style {
}
class X86Subtarget final : public X86GenSubtargetInfo {
+
protected:
enum X86SSEEnum {
NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
@@ -181,6 +187,9 @@ protected:
/// SlowLEA - True if the LEA instruction with certain arguments is slow
bool SlowLEA;
+ /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
+ bool SlowIncDec;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -217,14 +226,31 @@ private:
/// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit.
bool In16BitMode;
+ // Calculates type size & alignment
+ const DataLayout DL;
+ X86SelectionDAGInfo TSInfo;
+ // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
+ // X86TargetLowering needs.
+ X86InstrInfo InstrInfo;
+ X86TargetLowering TLInfo;
+ X86FrameLowering FrameLowering;
+ X86JITInfo JITInfo;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
+ const std::string &FS, X86TargetMachine &TM,
unsigned StackAlignOverride);
+ const X86TargetLowering *getTargetLowering() const { return &TLInfo; }
+ const X86InstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const X86FrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ X86JITInfo *getJITInfo() { return &JITInfo; }
+
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
@@ -241,6 +267,9 @@ public:
/// \brief Reset the features for the X86 target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
+ /// \brief Initialize the full set of dependencies so we can use an initializer
+ /// list for X86Subtarget.
+ X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
public:
@@ -319,6 +348,7 @@ public:
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
+ bool slowIncDec() const { return SlowIncDec; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 93760ef..f12140f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -29,61 +29,14 @@ extern "C" void LLVMInitializeX86Target() {
void X86TargetMachine::anchor() { }
-static std::string computeDataLayout(const X86Subtarget &ST) {
- // X86 is little endian
- std::string Ret = "e";
-
- Ret += DataLayout::getManglingComponent(ST.getTargetTriple());
- // X86 and x32 have 32 bit pointers.
- if (ST.isTarget64BitILP32() || !ST.is64Bit())
- Ret += "-p:32:32";
-
- // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32.
- if (ST.is64Bit() || ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC() ||
- ST.isTargetNaCl())
- Ret += "-i64:64";
- else
- Ret += "-f64:32:64";
-
- // Some ABIs align long double to 128 bits, others to 32.
- if (ST.isTargetNaCl())
- ; // No f80
- else if (ST.is64Bit() || ST.isTargetDarwin())
- Ret += "-f80:128";
- else
- Ret += "-f80:32";
-
- // The registers can hold 8, 16, 32 or, in x86-64, 64 bits.
- if (ST.is64Bit())
- Ret += "-n8:16:32:64";
- else
- Ret += "-n8:16:32";
-
- // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
- if (!ST.is64Bit() && (ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC()))
- Ret += "-S32";
- else
- Ret += "-S128";
-
- return Ret;
-}
-
/// X86TargetMachine ctor - Create an X86 target.
///
-X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
+X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, Options.StackAlignmentOverride),
- FrameLowering(*this, Subtarget),
- InstrItins(Subtarget.getInstrItineraryData()),
- DL(computeDataLayout(*getSubtargetImpl())),
- InstrInfo(*this),
- TLInfo(*this),
- TSInfo(*this),
- JITInfo(*this) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -158,6 +111,7 @@ public:
return *getX86TargetMachine().getSubtargetImpl();
}
+ void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
bool addPreRegAlloc() override;
@@ -170,6 +124,12 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
return new X86PassConfig(this, PM);
}
+void X86PassConfig::addIRPasses() {
+ addPass(createX86AtomicExpandPass(&getX86TargetMachine()));
+
+ TargetPassConfig::addIRPasses();
+}
+
bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 57e6eda..41d5157 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -13,12 +13,7 @@
#ifndef X86TARGETMACHINE_H
#define X86TARGETMACHINE_H
-
-#include "X86FrameLowering.h"
-#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
-#include "X86JITInfo.h"
-#include "X86SelectionDAGInfo.h"
#include "X86Subtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -30,13 +25,6 @@ class StringRef;
class X86TargetMachine final : public LLVMTargetMachine {
virtual void anchor();
X86Subtarget Subtarget;
- X86FrameLowering FrameLowering;
- InstrItineraryData InstrItins;
- const DataLayout DL; // Calculates type size & alignment
- X86InstrInfo InstrInfo;
- X86TargetLowering TLInfo;
- X86SelectionDAGInfo TSInfo;
- X86JITInfo JITInfo;
public:
X86TargetMachine(const Target &T, StringRef TT,
@@ -44,28 +32,28 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- const DataLayout *getDataLayout() const override { return &DL; }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
const X86InstrInfo *getInstrInfo() const override {
- return &InstrInfo;
+ return getSubtargetImpl()->getInstrInfo();
}
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- X86JITInfo *getJITInfo() override {
- return &JITInfo;
+ return getSubtargetImpl()->getFrameLowering();
}
+ X86JITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; }
const X86TargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
const X86RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ return &getSubtargetImpl()->getInstrItineraryData();
}
/// \brief Register X86 analysis passes with a pass manager.
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 91b9d40..c961e2f 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -102,6 +102,8 @@ public:
unsigned getReductionCost(unsigned Opcode, Type *Ty,
bool IsPairwiseForm) const override;
+ unsigned getIntImmCost(int64_t) const;
+
unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
@@ -142,13 +144,17 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasSSE1())
return 0;
- if (ST->is64Bit())
+ if (ST->is64Bit()) {
+ if (Vector && ST->hasAVX512())
+ return 32;
return 16;
+ }
return 8;
}
unsigned X86TTI::getRegisterBitWidth(bool Vector) const {
if (Vector) {
+ if (ST->hasAVX512()) return 512;
if (ST->hasAVX()) return 256;
if (ST->hasSSE1()) return 128;
return 0;
@@ -400,17 +406,117 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) const {
- // We only estimate the cost of reverse shuffles.
- if (Kind != SK_Reverse)
+ // We only estimate the cost of reverse and alternate shuffles.
+ if (Kind != SK_Reverse && Kind != SK_Alternate)
return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
- unsigned Cost = 1;
- if (LT.second.getSizeInBits() > 128)
- Cost = 3; // Extract + insert + copy.
+ if (Kind == SK_Reverse) {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+ unsigned Cost = 1;
+ if (LT.second.getSizeInBits() > 128)
+ Cost = 3; // Extract + insert + copy.
+
+ // Multiple by the number of parts.
+ return Cost * LT.first;
+ }
+
+ if (Kind == SK_Alternate) {
+ // 64-bit packed float vectors (v2f32) are widened to type v4f32.
+ // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp);
+
+ // The backend knows how to generate a single VEX.256 version of
+ // instruction VPBLENDW if the target supports AVX2.
+ if (ST->hasAVX2() && LT.second == MVT::v16i16)
+ return LT.first;
+
+ static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
+ {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
+
+ {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
+ {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
+
+ // This shuffle is custom lowered into a sequence of:
+ // 2x vextractf128 , 2x vpblendw , 1x vinsertf128
+ {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5},
+
+ // This shuffle is custom lowered into a long sequence of:
+ // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
+ {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
+ };
+
+ if (ST->hasAVX()) {
+ int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * AVXAltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
+ // These are lowered into movsd.
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
+
+ // packed float vectors with four elements are lowered into BLENDI dag
+ // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
+
+ // This shuffle generates a single pshufw.
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
+
+ // There is no instruction that matches a v16i8 alternate shuffle.
+ // The backend will expand it into the sequence 'pshufb + pshufb + or'.
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
+ };
+
+ if (ST->hasSSE41()) {
+ int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSE41AltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
+
+ // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
+ // the sequence 'shufps + pshufd'
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
- // Multiple by the number of parts.
- return Cost * LT.first;
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
+ };
+
+ if (ST->hasSSSE3()) {
+ int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
+ }
+
+ static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
+ {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
+ {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
+
+ {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
+ {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
+
+ // This is expanded into a long sequence of four extract + four insert.
+ {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
+
+ // 8 x (pinsrw + pextrw + and + movb + movzb + or)
+ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
+ };
+
+ // Fall-back (SSE3 and SSE2).
+ int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
+ if (Idx != -1)
+ return LT.first * SSEAltShuffleTbl[Idx].Cost;
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+ }
+
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
}
unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
@@ -808,6 +914,19 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise);
}
+/// \brief Calculate the cost of materializing a 64-bit value. This helper
+/// method might only calculate a fraction of a larger immediate. Therefore it
+/// is valid to return a cost of ZERO.
+unsigned X86TTI::getIntImmCost(int64_t Val) const {
+ if (Val == 0)
+ return TCC_Free;
+
+ if (isInt<32>(Val))
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+}
+
unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
assert(Ty->isIntegerTy());
@@ -825,11 +944,21 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
if (Imm == 0)
return TCC_Free;
- if (Imm.getBitWidth() <= 64 &&
- (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue())))
- return TCC_Basic;
- else
- return 2 * TCC_Basic;
+ // Sign-extend all constants to a multiple of 64-bit.
+ APInt ImmVal = Imm;
+ if (BitSize & 0x3f)
+ ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+
+ // Split the constant into 64-bit chunks and calculate the cost for each
+ // chunk.
+ unsigned Cost = 0;
+ for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
+ APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
+ int64_t Val = Tmp.getSExtValue();
+ Cost += getIntImmCost(Val);
+ }
+ // We need at least one instruction to materialze the constant.
+ return std::max(1U, Cost);
}
unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
@@ -889,9 +1018,13 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
break;
}
- if ((Idx == ImmIdx) &&
- Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
- return TCC_Free;
+ if (Idx == ImmIdx) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = X86TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic)
+ ? static_cast<unsigned>(TCC_Free)
+ : Cost;
+ }
return X86TTI::getIntImmCost(Imm, Ty);
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 5499aba..e694736 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -228,7 +228,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
- DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+ // Debug location must be unknown since the first debug location is used
+ // to determine the end of the prologue.
+ DebugLoc dl;
if (MFI->getMaxAlignment() > getStackAlignment())
report_fatal_error("emitPrologue unsupported alignment: "
@@ -416,7 +418,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
DebugLoc DL;
- if (MI != MBB.end())
+ if (MI != MBB.end() && !MI->isDebugValue())
DL = MI->getDebugLoc();
for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 9d78586..be7ef64 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -68,10 +68,9 @@ getTargetNodeName(unsigned Opcode) const
}
}
-XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
- : TargetLowering(XTM, new XCoreTargetObjectFile()),
- TM(XTM),
- Subtarget(*XTM.getSubtargetImpl()) {
+XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM)
+ : TargetLowering(TM, new XCoreTargetObjectFile()), TM(TM),
+ Subtarget(TM.getSubtarget<XCoreSubtarget>()) {
// Set up the register classes.
addRegisterClass(MVT::i32, &XCore::GRRegsRegClass);
@@ -92,15 +91,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
// XCore does not have the NodeTypes below.
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::ADDC, MVT::i32, Expand);
setOperationAction(ISD::ADDE, MVT::i32, Expand);
setOperationAction(ISD::SUBC, MVT::i32, Expand);
setOperationAction(ISD::SUBE, MVT::i32, Expand);
- // Stop the combiner recombining select and set_cc
- setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-
// 64bit
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i64, Custom);
@@ -217,7 +213,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG);
@@ -258,33 +253,21 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
// Misc Lower Operation implementation
//===----------------------------------------------------------------------===//
-SDValue XCoreTargetLowering::
-LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDLoc dl(Op);
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
- Op.getOperand(3), Op.getOperand(4));
- return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
- Op.getOperand(1));
-}
-
SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA,
const GlobalValue *GV,
SelectionDAG &DAG) const {
// FIXME there is no actual debug info here
SDLoc dl(GA);
- const GlobalValue *UnderlyingGV = GV;
- // If GV is an alias then use the aliasee to determine the wrapper type
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- UnderlyingGV = GA->getAliasee();
- if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
- if ((GVar->isConstant() && GV->hasLocalLinkage()) ||
- (GVar->hasSection() &&
- StringRef(GVar->getSection()).startswith(".cp.")))
- return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
- }
- return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+
+ if (GV->getType()->getElementType()->isFunctionTy())
+ return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+
+ const auto *GVar = dyn_cast<GlobalVariable>(GV);
+ if ((GV->hasSection() && StringRef(GV->getSection()).startswith(".cp.")) ||
+ (GVar && GVar->isConstant() && GV->hasLocalLinkage()))
+ return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+
+ return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
}
static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) {
@@ -508,7 +491,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
CLI.setDebugLoc(DL).setChain(Chain)
.setCallee(CallingConv::C, IntPtrTy,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ops[] = { CallResult.first, CallResult.second };
@@ -568,7 +551,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
CLI.setDebugLoc(dl).setChain(Chain)
.setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- &Args, 0);
+ std::move(Args), 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d28715b..62b89c3 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -94,7 +94,7 @@ namespace llvm {
{
public:
- explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+ explicit XCoreTargetLowering(const TargetMachine &TM);
using TargetLowering::isZExtFree;
bool isZExtFree(SDValue Val, EVT VT2) const override;
@@ -123,7 +123,7 @@ namespace llvm {
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
private:
- const XCoreTargetMachine &TM;
+ const TargetMachine &TM;
const XCoreSubtarget &Subtarget;
// Lower Operand helpers
@@ -157,7 +157,6 @@ namespace llvm {
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 984f0cd..36ea9a0 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -373,7 +373,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ if (I != MBB.end() && !I->isDebugValue())
+ DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO =
@@ -395,7 +396,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ if (I != MBB.end() && !I->isDebugValue())
+ DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
const MachineFrameInfo &MFI = *MF->getFrameInfo();
MachineMemOperand *MMO =
@@ -440,7 +442,8 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
MachineBasicBlock::iterator MI,
unsigned Reg, uint64_t Value) const {
DebugLoc dl;
- if (MI != MBB.end()) dl = MI->getDebugLoc();
+ if (MI != MBB.end() && !MI->isDebugValue())
+ dl = MI->getDebugLoc();
if (isImmMskBitp(Value)) {
int N = Log2_32(Value) + 1;
return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N);
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 5a6bbe7..91b33fd 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -16,9 +16,8 @@ using namespace llvm;
#define DEBUG_TYPE "xcore-selectiondag-info"
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
- : TargetSelectionDAGInfo(TM) {
-}
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL)
+ : TargetSelectionDAGInfo(&DL) {}
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
}
@@ -47,7 +46,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
.setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
Type::getVoidTy(*DAG.getContext()),
DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()),
- &Args, 0)
+ std::move(Args), 0)
.setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index ea6af98..0079de1 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -22,7 +22,7 @@ class XCoreTargetMachine;
class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+ explicit XCoreSelectionDAGInfo(const DataLayout &DL);
~XCoreSelectionDAGInfo();
SDValue
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 89ea03a..7227411 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -25,8 +25,8 @@ using namespace llvm;
void XCoreSubtarget::anchor() { }
-XCoreSubtarget::XCoreSubtarget(const std::string &TT,
- const std::string &CPU, const std::string &FS)
- : XCoreGenSubtargetInfo(TT, CPU, FS)
-{
-}
+XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, const TargetMachine &TM)
+ : XCoreGenSubtargetInfo(TT, CPU, FS),
+ DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
+ InstrInfo(), FrameLowering(*this), TLInfo(TM), TSInfo(DL) {}
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 5ac4dbc..1e9810b 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -14,6 +14,11 @@
#ifndef XCORESUBTARGET_H
#define XCORESUBTARGET_H
+#include "XCoreFrameLowering.h"
+#include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSelectionDAGInfo.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <string>
@@ -26,17 +31,31 @@ class StringRef;
class XCoreSubtarget : public XCoreGenSubtargetInfo {
virtual void anchor();
+ const DataLayout DL; // Calculates type size & alignment
+ XCoreInstrInfo InstrInfo;
+ XCoreFrameLowering FrameLowering;
+ XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
///
XCoreSubtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
+ const std::string &FS, const TargetMachine &TM);
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ const XCoreFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const XCoreSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+ const TargetRegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+ const DataLayout *getDataLayout() const { return &DL; }
};
} // End llvm namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 0fb21c5..8d8bb38 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -25,13 +25,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"),
- InstrInfo(),
- FrameLowering(Subtarget),
- TLInfo(*this),
- TSInfo(*this) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this) {
initAsmInfo();
}
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index a57ca55..14c43bf 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,46 +14,38 @@
#ifndef XCORETARGETMACHINE_H
#define XCORETARGETMACHINE_H
-#include "XCoreFrameLowering.h"
-#include "XCoreISelLowering.h"
-#include "XCoreInstrInfo.h"
-#include "XCoreSelectionDAGInfo.h"
#include "XCoreSubtarget.h"
-#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class XCoreTargetMachine : public LLVMTargetMachine {
XCoreSubtarget Subtarget;
- const DataLayout DL; // Calculates type size & alignment
- XCoreInstrInfo InstrInfo;
- XCoreFrameLowering FrameLowering;
- XCoreTargetLowering TLInfo;
- XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const XCoreInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
const XCoreFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return getSubtargetImpl()->getFrameLowering();
}
const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
const XCoreTargetLowering *getTargetLowering() const override {
- return &TLInfo;
+ return getSubtargetImpl()->getTargetLowering();
}
-
const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override {
- return &TSInfo;
+ return getSubtargetImpl()->getSelectionDAGInfo();
}
-
const TargetRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ return getSubtargetImpl()->getRegisterInfo();
+ }
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
}
- const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 377fa15..f9de54a 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,6 +39,8 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
@@ -67,21 +69,24 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
+ const DataLayout *DL;
private:
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
CallGraphNode *DoPromotion(Function *F,
SmallPtrSet<Argument*, 8> &ArgsToPromote,
SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+ bool doInitialization(CallGraph &CG) override;
/// The maximum number of elements to expand, or 0 for unlimited.
unsigned maxElements;
+ DenseMap<const Function *, DISubprogram> FunctionDIs;
};
}
@@ -100,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
@@ -215,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
+ const DataLayout *DL) {
Function *Callee = Arg->getParent();
unsigned ArgNo = Arg->getArgNo();
@@ -226,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
return false;
}
return true;
@@ -334,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -604,6 +613,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
NF->copyAttributesFrom(F);
+ // Patch the pointer to LLVM function in debug info descriptor.
+ auto DI = FunctionDIs.find(F);
+ if (DI != FunctionDIs.end())
+ DI->second.replaceFunction(NF);
DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"
<< "From: " << *F);
@@ -741,6 +754,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
+ New->setDebugLoc(Call->getDebugLoc());
Args.clear();
AttributesVec.clear();
@@ -902,3 +916,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
return NF_CGN;
}
+
+bool ArgPromotion::doInitialization(CallGraph &CG) {
+ FunctionDIs = makeSubprogramMap(CG.getModule());
+ return CallGraphSCCPass::doInitialization(CG);
+}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 284b896..ac3853d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -127,8 +127,7 @@ namespace {
// As the code generation for module is finished (and DIBuilder is
// finalized) we assume that subprogram descriptors won't be changed, and
// they are stored in map for short duration anyway.
- typedef DenseMap<Function*, DISubprogram> FunctionDIMap;
- FunctionDIMap FunctionDIs;
+ DenseMap<const Function *, DISubprogram> FunctionDIs;
protected:
// DAH uses this to specify a different ID.
@@ -150,7 +149,6 @@ namespace {
unsigned RetValNum = 0);
Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
- void CollectFunctionDIs(Module &M);
void SurveyFunction(const Function &F);
void MarkValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses);
@@ -190,35 +188,6 @@ INITIALIZE_PASS(DAH, "deadarghaX0r",
ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
-/// CollectFunctionDIs - Map each function in the module to its debug info
-/// descriptor.
-void DAE::CollectFunctionDIs(Module &M) {
- FunctionDIs.clear();
-
- for (Module::named_metadata_iterator I = M.named_metadata_begin(),
- E = M.named_metadata_end(); I != E; ++I) {
- NamedMDNode &NMD = *I;
- for (unsigned MDIndex = 0, MDNum = NMD.getNumOperands();
- MDIndex < MDNum; ++MDIndex) {
- MDNode *Node = NMD.getOperand(MDIndex);
- if (!DIDescriptor(Node).isCompileUnit())
- continue;
- DICompileUnit CU(Node);
- const DIArray &SPs = CU.getSubprograms();
- for (unsigned SPIndex = 0, SPNum = SPs.getNumElements();
- SPIndex < SPNum; ++SPIndex) {
- DISubprogram SP(SPs.getElement(SPIndex));
- assert((!SP || SP.isSubprogram()) &&
- "A MDNode in subprograms of a CU should be null or a DISubprogram.");
- if (!SP)
- continue;
- if (Function *F = SP.getFunction())
- FunctionDIs[F] = SP;
- }
- }
- }
-}
-
/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
/// llvm.vastart is never called, the varargs list is dead for the function.
bool DAE::DeleteDeadVarargs(Function &Fn) {
@@ -327,7 +296,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
}
// Patch the pointer to LLVM function in debug info descriptor.
- FunctionDIMap::iterator DI = FunctionDIs.find(&Fn);
+ auto DI = FunctionDIs.find(&Fn);
if (DI != FunctionDIs.end())
DI->second.replaceFunction(NF);
@@ -1087,7 +1056,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
// Patch the pointer to LLVM function in debug info descriptor.
- FunctionDIMap::iterator DI = FunctionDIs.find(F);
+ auto DI = FunctionDIs.find(F);
if (DI != FunctionDIs.end())
DI->second.replaceFunction(NF);
@@ -1101,7 +1070,7 @@ bool DAE::runOnModule(Module &M) {
bool Changed = false;
// Collect debug info descriptors for functions.
- CollectFunctionDIs(M);
+ FunctionDIs = makeSubprogramMap(M);
// First pass: Do a simple check to see if any functions can have their "..."
// removed. We can do this if they never call va_start. This loop cannot be
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index fed8839..8174df9 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -449,14 +449,29 @@ determinePointerReadAttrs(Argument *A,
case Instruction::Call:
case Instruction::Invoke: {
+ bool Captures = true;
+
+ if (I->getType()->isVoidTy())
+ Captures = false;
+
+ auto AddUsersToWorklistIfCapturing = [&] {
+ if (Captures)
+ for (Use &UU : I->uses())
+ if (Visited.insert(&UU))
+ Worklist.push_back(&UU);
+ };
+
CallSite CS(I);
- if (CS.doesNotAccessMemory())
+ if (CS.doesNotAccessMemory()) {
+ AddUsersToWorklistIfCapturing();
continue;
+ }
Function *F = CS.getCalledFunction();
if (!F) {
if (CS.onlyReadsMemory()) {
IsRead = true;
+ AddUsersToWorklistIfCapturing();
continue;
}
return Attribute::None;
@@ -471,6 +486,7 @@ determinePointerReadAttrs(Argument *A,
"More params than args in non-varargs call.");
return Attribute::None;
}
+ Captures &= !CS.doesNotCapture(A - B);
if (SCCNodes.count(AI))
continue;
if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
@@ -479,6 +495,7 @@ determinePointerReadAttrs(Argument *A,
IsRead = true;
}
}
+ AddUsersToWorklistIfCapturing();
break;
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 9decddc..7e7a4c0 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -62,7 +62,7 @@ static bool isEmptyFunction(Function *F) {
if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
return false;
ReturnInst &RI = cast<ReturnInst>(Entry.front());
- return RI.getReturnValue() == NULL;
+ return RI.getReturnValue() == nullptr;
}
char GlobalDCE::ID = 0;
@@ -77,13 +77,19 @@ bool GlobalDCE::runOnModule(Module &M) {
// Remove empty functions from the global ctors list.
Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+ typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy;
+ ComdatGVPairsTy ComdatGVPairs;
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
// Functions with external linkage are needed if they have a body
- if (!I->isDiscardableIfUnused() &&
- !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
- GlobalIsNeeded(I);
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
+ if (!I->isDiscardableIfUnused())
+ GlobalIsNeeded(I);
+ else if (const Comdat *C = I->getComdat())
+ ComdatGVPairs.insert(std::make_pair(C, I));
+ }
}
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
@@ -91,17 +97,38 @@ bool GlobalDCE::runOnModule(Module &M) {
Changed |= RemoveUnusedGlobalValue(*I);
// Externally visible & appending globals are needed, if they have an
// initializer.
- if (!I->isDiscardableIfUnused() &&
- !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
- GlobalIsNeeded(I);
+ if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
+ if (!I->isDiscardableIfUnused())
+ GlobalIsNeeded(I);
+ else if (const Comdat *C = I->getComdat())
+ ComdatGVPairs.insert(std::make_pair(C, I));
+ }
}
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
// Externally visible aliases are needed.
- if (!I->isDiscardableIfUnused())
+ if (!I->isDiscardableIfUnused()) {
GlobalIsNeeded(I);
+ } else if (const Comdat *C = I->getComdat()) {
+ ComdatGVPairs.insert(std::make_pair(C, I));
+ }
+ }
+
+ for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(),
+ E = ComdatGVPairs.end();
+ I != E;) {
+ ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first);
+ bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) {
+ return Pair.second->isDiscardableIfUnused();
+ });
+ if (!CanDiscard) {
+ std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) {
+ GlobalIsNeeded(Pair.second);
+ });
+ }
+ I = UB;
}
// Now that all globals which are needed are in the AliveGlobals set, we loop
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index ae80c43..c1d0d3b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -1699,9 +1700,6 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
/// possible. If we make a change, return true.
bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
Module::global_iterator &GVI) {
- if (!GV->isDiscardableIfUnused())
- return false;
-
// Do more involved optimizations if the global is internal.
GV->removeDeadConstantUsers();
@@ -1910,7 +1908,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
Function *F = FI++;
// Functions without names cannot be referenced outside this module.
- if (!F->hasName() && !F->isDeclaration())
+ if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
F->setLinkage(GlobalValue::InternalLinkage);
F->removeDeadConstantUsers();
if (F->isDefTriviallyDead()) {
@@ -1944,11 +1942,18 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
bool GlobalOpt::OptimizeGlobalVars(Module &M) {
bool Changed = false;
+
+ SmallSet<const Comdat *, 8> NotDiscardableComdats;
+ for (const GlobalVariable &GV : M.globals())
+ if (const Comdat *C = GV.getComdat())
+ if (!GV.isDiscardableIfUnused())
+ NotDiscardableComdats.insert(C);
+
for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
GVI != E; ) {
GlobalVariable *GV = GVI++;
// Global variables without names cannot be referenced outside this module.
- if (!GV->hasName() && !GV->isDeclaration())
+ if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
GV->setLinkage(GlobalValue::InternalLinkage);
// Simplify the initializer.
if (GV->hasInitializer())
@@ -1958,7 +1963,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
GV->setInitializer(New);
}
- Changed |= ProcessGlobal(GV, GVI);
+ if (GV->isDiscardableIfUnused()) {
+ if (const Comdat *C = GV->getComdat())
+ if (NotDiscardableComdats.count(C))
+ continue;
+ Changed |= ProcessGlobal(GV, GVI);
+ }
}
return Changed;
}
@@ -1980,10 +1990,13 @@ isSimpleEnoughValueToCommit(Constant *C,
static bool isSimpleEnoughValueToCommitHelper(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
const DataLayout *DL) {
- // Simple integer, undef, constant aggregate zero, global addresses, etc are
- // all supported.
- if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
- isa<GlobalValue>(C))
+ // Simple global addresses are supported, do not allow dllimport or
+ // thread-local globals.
+ if (auto *GV = dyn_cast<GlobalValue>(C))
+ return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal();
+
+ // Simple integer, undef, constant aggregate zero, etc are all supported.
+ if (C->getNumOperands() == 0 || isa<BlockAddress>(C))
return true;
// Aggregate values are safe if all their elements are.
@@ -2054,8 +2067,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
- // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
- // external globals.
+ // Do not allow weak/*_odr/linkonce linkage or external globals.
return GV->hasUniqueInitializer();
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@@ -2846,14 +2858,19 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
I != E;) {
Module::alias_iterator J = I++;
// Aliases without names cannot be referenced outside this module.
- if (!J->hasName() && !J->isDeclaration())
+ if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
J->setLinkage(GlobalValue::InternalLinkage);
// If the aliasee may change at link time, nothing can be done - bail out.
if (J->mayBeOverridden())
continue;
Constant *Aliasee = J->getAliasee();
- GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+ GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
+ // We can't trivially replace the alias with the aliasee if the aliasee is
+ // non-trivial in some way.
+ // TODO: Try to handle non-zero GEPs of local aliasees.
+ if (!Target)
+ continue;
Target->removeDeadConstantUsers();
// Make all users of the alias use the aliasee instead.
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index c3a2b12..559ef0b 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -9,13 +9,24 @@
//
// This pass looks for equivalent functions that are mergable and folds them.
//
-// A hash is computed from the function, based on its type and number of
-// basic blocks.
+// Order relation is defined on set of functions. It was made through
+// special function comparison procedure that returns
+// 0 when functions are equal,
+// -1 when Left function is less than right function, and
+// 1 for opposite case. We need total-ordering, so we need to maintain
+// four properties on the functions set:
+// a <= a (reflexivity)
+// if a <= b and b <= a then a = b (antisymmetry)
+// if a <= b and b <= c then a <= c (transitivity).
+// for all a and b: a <= b or b <= a (totality).
//
-// Once all hashes are computed, we perform an expensive equality comparison
-// on each function pair. This takes n^2/2 comparisons per bucket, so it's
-// important that the hash function be high quality. The equality comparison
-// iterates through each instruction in each basic block.
+// Comparison iterates through each instruction in each basic block.
+// Functions are kept on binary tree. For each new function F we perform
+// lookup in binary tree.
+// In practice it works the following way:
+// -- We define Function* container class with custom "operator<" (FunctionPtr).
+// -- "FunctionPtr" instances are stored in std::set collection, so every
+// std::set::insert operation will give you result in log(N) time.
//
// When a match is found the functions are folded. If both functions are
// overridable, we move the functionality into a new internal function and
@@ -31,9 +42,6 @@
// the object they belong to. However, as long as it's only used for a lookup
// and call, this is irrelevant, and we'd like to fold such functions.
//
-// * switch from n^2 pair-wise comparisons to an n-way comparison for each
-// bucket.
-//
// * be smarter about bitcasts.
//
// In order to fold functions, we will sometimes add either bitcast instructions
@@ -41,6 +49,36 @@
// analysis since the two functions differ where one has a bitcast and the
// other doesn't. We should learn to look through bitcasts.
//
+// * Compare complex types with pointer types inside.
+// * Compare cross-reference cases.
+// * Compare complex expressions.
+//
+// All the three issues above could be described as ability to prove that
+// fA == fB == fC == fE == fF == fG in example below:
+//
+// void fA() {
+// fB();
+// }
+// void fB() {
+// fA();
+// }
+//
+// void fE() {
+// fF();
+// }
+// void fF() {
+// fG();
+// }
+// void fG() {
+// fE();
+// }
+//
+// Simplest cross-reference case (fA <--> fB) was implemented in previous
+// versions of MergeFunctions, though it presented only in two function pairs
+// in test-suite (that counts >50k functions)
+// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A)
+// could cover much more cases.
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
@@ -60,6 +98,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,89 +112,12 @@ STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
STATISTIC(NumDoubleWeak, "Number of new functions created");
-/// Returns the type id for a type to be hashed. We turn pointer types into
-/// integers here because the actual compare logic below considers pointers and
-/// integers of the same size as equal.
-static Type::TypeID getTypeIDForHash(Type *Ty) {
- if (Ty->isPointerTy())
- return Type::IntegerTyID;
- return Ty->getTypeID();
-}
-
-/// Creates a hash-code for the function which is the same for any two
-/// functions that will compare equal, without looking at the instructions
-/// inside the function.
-static unsigned profileFunction(const Function *F) {
- FunctionType *FTy = F->getFunctionType();
-
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(getTypeIDForHash(FTy->getReturnType()));
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(getTypeIDForHash(FTy->getParamType(i)));
- return ID.ComputeHash();
-}
-
-namespace {
-
-/// ComparableFunction - A struct that pairs together functions with a
-/// DataLayout so that we can keep them together as elements in the DenseSet.
-class ComparableFunction {
-public:
- static const ComparableFunction EmptyKey;
- static const ComparableFunction TombstoneKey;
- static DataLayout * const LookupOnly;
-
- ComparableFunction(Function *Func, const DataLayout *DL)
- : Func(Func), Hash(profileFunction(Func)), DL(DL) {}
-
- Function *getFunc() const { return Func; }
- unsigned getHash() const { return Hash; }
- const DataLayout *getDataLayout() const { return DL; }
-
- // Drops AssertingVH reference to the function. Outside of debug mode, this
- // does nothing.
- void release() {
- assert(Func &&
- "Attempted to release function twice, or release empty/tombstone!");
- Func = nullptr;
- }
-
-private:
- explicit ComparableFunction(unsigned Hash)
- : Func(nullptr), Hash(Hash), DL(nullptr) {}
-
- AssertingVH<Function> Func;
- unsigned Hash;
- const DataLayout *DL;
-};
-
-const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
-const ComparableFunction ComparableFunction::TombstoneKey =
- ComparableFunction(1);
-DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1);
-
-}
-
-namespace llvm {
- template <>
- struct DenseMapInfo<ComparableFunction> {
- static ComparableFunction getEmptyKey() {
- return ComparableFunction::EmptyKey;
- }
- static ComparableFunction getTombstoneKey() {
- return ComparableFunction::TombstoneKey;
- }
- static unsigned getHashValue(const ComparableFunction &CF) {
- return CF.getHash();
- }
- static bool isEqual(const ComparableFunction &LHS,
- const ComparableFunction &RHS);
- };
-}
+static cl::opt<unsigned> NumFunctionsForSanityCheck(
+ "mergefunc-sanity",
+ cl::desc("How many functions in module could be used for "
+ "MergeFunctions pass sanity check. "
+ "'0' disables this check. Works only with '-debug' key."),
+ cl::init(0), cl::Hidden);
namespace {
@@ -167,14 +129,14 @@ class FunctionComparator {
public:
FunctionComparator(const DataLayout *DL, const Function *F1,
const Function *F2)
- : F1(F1), F2(F2), DL(DL) {}
+ : FnL(F1), FnR(F2), DL(DL) {}
/// Test whether the two functions have equivalent behaviour.
- bool compare();
+ int compare();
private:
/// Test whether two basic blocks have equivalent behaviour.
- bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+ int compare(const BasicBlock *BBL, const BasicBlock *BBR);
/// Constants comparison.
/// Its analog to lexicographical comparison between hypothetical numbers
@@ -300,10 +262,6 @@ private:
/// see comments for sn_mapL and sn_mapR.
int cmpValues(const Value *L, const Value *R);
- bool enumerate(const Value *V1, const Value *V2) {
- return cmpValues(V1, V2) == 0;
- }
-
/// Compare two Instructions for equivalence, similar to
/// Instruction::isSameOperationAs but with modifications to the type
/// comparison.
@@ -325,15 +283,11 @@ private:
/// 6.1.Load: volatile (as boolean flag)
/// 6.2.Load: alignment (as integer numbers)
/// 6.3.Load: synch-scope (as integer numbers)
+ /// 6.4.Load: range metadata (as integer numbers)
/// On this stage its better to see the code, since its not more than 10-15
/// strings for particular instruction, and could change sometimes.
int cmpOperation(const Instruction *L, const Instruction *R) const;
- bool isEquivalentOperation(const Instruction *I1,
- const Instruction *I2) const {
- return cmpOperation(I1, I2) == 0;
- }
-
/// Compare two GEPs for equivalent pointer arithmetic.
/// Parts to be compared for each comparison stage,
/// most significant stage first:
@@ -348,14 +302,6 @@ private:
return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
}
- bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) {
- return cmpGEP(GEP1, GEP2) == 0;
- }
- bool isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2) {
- return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
- }
-
/// cmpType - compares two types,
/// defines total ordering among the types set.
///
@@ -398,10 +344,6 @@ private:
/// 6. For all other cases put llvm_unreachable.
int cmpType(Type *TyL, Type *TyR) const;
- bool isEquivalentType(Type *Ty1, Type *Ty2) const {
- return cmpType(Ty1, Ty2) == 0;
- }
-
int cmpNumbers(uint64_t L, uint64_t R) const;
int cmpAPInt(const APInt &L, const APInt &R) const;
@@ -410,7 +352,7 @@ private:
int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
// The two functions undergoing comparison.
- const Function *F1, *F2;
+ const Function *FnL, *FnR;
const DataLayout *DL;
@@ -450,6 +392,18 @@ private:
DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
+class FunctionPtr {
+ AssertingVH<Function> F;
+ const DataLayout *DL;
+
+public:
+ FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {}
+ Function *getFunc() const { return F; }
+ void release() { F = 0; }
+ bool operator<(const FunctionPtr &RHS) const {
+ return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1;
+ }
+};
}
int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
@@ -788,7 +742,11 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res =
cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
return Res;
- return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope());
+ if (int Res =
+ cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+ return Res;
+ return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
+ (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
if (int Res =
@@ -847,6 +805,9 @@ int FunctionComparator::cmpOperation(const Instruction *L,
if (int Res = cmpNumbers(CXI->isVolatile(),
cast<AtomicCmpXchgInst>(R)->isVolatile()))
return Res;
+ if (int Res = cmpNumbers(CXI->isWeak(),
+ cast<AtomicCmpXchgInst>(R)->isWeak()))
+ return Res;
if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
return Res;
@@ -914,13 +875,13 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
/// See comments in declaration for more details.
int FunctionComparator::cmpValues(const Value *L, const Value *R) {
// Catch self-reference case.
- if (L == F1) {
- if (R == F2)
+ if (L == FnL) {
+ if (R == FnR)
return 0;
return -1;
}
- if (R == F2) {
- if (L == F1)
+ if (R == FnR) {
+ if (L == FnL)
return 0;
return 1;
}
@@ -954,90 +915,102 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
return cmpNumbers(LeftSN.first->second, RightSN.first->second);
}
// Test whether two basic blocks have equivalent behaviour.
-bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
- BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
- BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
+ BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+ BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
do {
- if (!enumerate(F1I, F2I))
- return false;
+ if (int Res = cmpValues(InstL, InstR))
+ return Res;
- if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
- const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
- if (!GEP2)
- return false;
+ const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
+ const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR);
- if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
- return false;
+ if (GEPL && !GEPR)
+ return 1;
+ if (GEPR && !GEPL)
+ return -1;
- if (!isEquivalentGEP(GEP1, GEP2))
- return false;
+ if (GEPL && GEPR) {
+ if (int Res =
+ cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+ return Res;
+ if (int Res = cmpGEP(GEPL, GEPR))
+ return Res;
} else {
- if (!isEquivalentOperation(F1I, F2I))
- return false;
-
- assert(F1I->getNumOperands() == F2I->getNumOperands());
- for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
- Value *OpF1 = F1I->getOperand(i);
- Value *OpF2 = F2I->getOperand(i);
-
- if (!enumerate(OpF1, OpF2))
- return false;
+ if (int Res = cmpOperation(InstL, InstR))
+ return Res;
+ assert(InstL->getNumOperands() == InstR->getNumOperands());
- if (OpF1->getValueID() != OpF2->getValueID() ||
- !isEquivalentType(OpF1->getType(), OpF2->getType()))
- return false;
+ for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+ Value *OpL = InstL->getOperand(i);
+ Value *OpR = InstR->getOperand(i);
+ if (int Res = cmpValues(OpL, OpR))
+ return Res;
+ if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
+ return Res;
+ // TODO: Already checked in cmpOperation
+ if (int Res = cmpType(OpL->getType(), OpR->getType()))
+ return Res;
}
}
- ++F1I, ++F2I;
- } while (F1I != F1E && F2I != F2E);
+ ++InstL, ++InstR;
+ } while (InstL != InstLE && InstR != InstRE);
- return F1I == F1E && F2I == F2E;
+ if (InstL != InstLE && InstR == InstRE)
+ return 1;
+ if (InstL == InstLE && InstR != InstRE)
+ return -1;
+ return 0;
}
// Test whether the two functions have equivalent behaviour.
-bool FunctionComparator::compare() {
- // We need to recheck everything, but check the things that weren't included
- // in the hash first.
+int FunctionComparator::compare() {
sn_mapL.clear();
sn_mapR.clear();
- if (F1->getAttributes() != F2->getAttributes())
- return false;
+ if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+ return Res;
- if (F1->hasGC() != F2->hasGC())
- return false;
+ if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+ return Res;
- if (F1->hasGC() && F1->getGC() != F2->getGC())
- return false;
+ if (FnL->hasGC()) {
+ if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC()))
+ return Res;
+ }
- if (F1->hasSection() != F2->hasSection())
- return false;
+ if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+ return Res;
- if (F1->hasSection() && F1->getSection() != F2->getSection())
- return false;
+ if (FnL->hasSection()) {
+ if (int Res = cmpStrings(FnL->getSection(), FnR->getSection()))
+ return Res;
+ }
- if (F1->isVarArg() != F2->isVarArg())
- return false;
+ if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+ return Res;
// TODO: if it's internal and only used in direct calls, we could handle this
// case too.
- if (F1->getCallingConv() != F2->getCallingConv())
- return false;
+ if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+ return Res;
- if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
- return false;
+ if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType()))
+ return Res;
- assert(F1->arg_size() == F2->arg_size() &&
+ assert(FnL->arg_size() == FnR->arg_size() &&
"Identically typed functions have different numbers of args!");
// Visit the arguments so that they get enumerated in the order they're
// passed in.
- for (Function::const_arg_iterator f1i = F1->arg_begin(),
- f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
- if (!enumerate(f1i, f2i))
+ for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+ ArgRI = FnR->arg_begin(),
+ ArgLE = FnL->arg_end();
+ ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+ if (cmpValues(ArgLI, ArgRI) != 0)
llvm_unreachable("Arguments repeat!");
}
@@ -1045,33 +1018,36 @@ bool FunctionComparator::compare() {
// linked list is immaterial. Our walk starts at the entry block for both
// functions, then takes each block from each terminator in order. As an
// artifact, this also means that unreachable blocks are ignored.
- SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
- F1BBs.push_back(&F1->getEntryBlock());
- F2BBs.push_back(&F2->getEntryBlock());
+ FnLBBs.push_back(&FnL->getEntryBlock());
+ FnRBBs.push_back(&FnR->getEntryBlock());
- VisitedBBs.insert(F1BBs[0]);
- while (!F1BBs.empty()) {
- const BasicBlock *F1BB = F1BBs.pop_back_val();
- const BasicBlock *F2BB = F2BBs.pop_back_val();
+ VisitedBBs.insert(FnLBBs[0]);
+ while (!FnLBBs.empty()) {
+ const BasicBlock *BBL = FnLBBs.pop_back_val();
+ const BasicBlock *BBR = FnRBBs.pop_back_val();
- if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
- return false;
+ if (int Res = cmpValues(BBL, BBR))
+ return Res;
+
+ if (int Res = compare(BBL, BBR))
+ return Res;
- const TerminatorInst *F1TI = F1BB->getTerminator();
- const TerminatorInst *F2TI = F2BB->getTerminator();
+ const TerminatorInst *TermL = BBL->getTerminator();
+ const TerminatorInst *TermR = BBR->getTerminator();
- assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
- for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+ for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(TermL->getSuccessor(i)))
continue;
- F1BBs.push_back(F1TI->getSuccessor(i));
- F2BBs.push_back(F2TI->getSuccessor(i));
+ FnLBBs.push_back(TermL->getSuccessor(i));
+ FnRBBs.push_back(TermR->getSuccessor(i));
}
}
- return true;
+ return 0;
}
namespace {
@@ -1092,21 +1068,25 @@ public:
bool runOnModule(Module &M) override;
private:
- typedef DenseSet<ComparableFunction> FnSetType;
+ typedef std::set<FunctionPtr> FnTreeType;
/// A work queue of functions that may have been modified and should be
/// analyzed again.
std::vector<WeakVH> Deferred;
- /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+ /// Checks the rules of order relation introduced among functions set.
+ /// Returns true, if sanity check has been passed, and false if failed.
+ bool doSanityCheck(std::vector<WeakVH> &Worklist);
+
+ /// Insert a ComparableFunction into the FnTree, or merge it away if it's
/// equal to one that's already present.
- bool insert(ComparableFunction &NewF);
+ bool insert(Function *NewFunction);
- /// Remove a Function from the FnSet and queue it up for a second sweep of
+ /// Remove a Function from the FnTree and queue it up for a second sweep of
/// analysis.
void remove(Function *F);
- /// Find the functions that use this Value and remove them from FnSet and
+ /// Find the functions that use this Value and remove them from FnTree and
/// queue the functions.
void removeUsers(Value *V);
@@ -1131,7 +1111,7 @@ private:
/// The set of all distinct functions. Use the insert() and remove() methods
/// to modify it.
- FnSetType FnSet;
+ FnTreeType FnTree;
/// DataLayout for more accurate GEP comparisons. May be NULL.
const DataLayout *DL;
@@ -1149,6 +1129,78 @@ ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
}
+bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
+ if (const unsigned Max = NumFunctionsForSanityCheck) {
+ unsigned TripleNumber = 0;
+ bool Valid = true;
+
+ dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";
+
+ unsigned i = 0;
+ for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end();
+ I != E && i < Max; ++I, ++i) {
+ unsigned j = i;
+ for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
+ Function *F1 = cast<Function>(*I);
+ Function *F2 = cast<Function>(*J);
+ int Res1 = FunctionComparator(DL, F1, F2).compare();
+ int Res2 = FunctionComparator(DL, F2, F1).compare();
+
+ // If F1 <= F2, then F2 >= F1, otherwise report failure.
+ if (Res1 != -Res2) {
+ dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber
+ << "\n";
+ F1->dump();
+ F2->dump();
+ Valid = false;
+ }
+
+ if (Res1 == 0)
+ continue;
+
+ unsigned k = j;
+ for (std::vector<WeakVH>::iterator K = J; K != E && k < Max;
+ ++k, ++K, ++TripleNumber) {
+ if (K == J)
+ continue;
+
+ Function *F3 = cast<Function>(*K);
+ int Res3 = FunctionComparator(DL, F1, F3).compare();
+ int Res4 = FunctionComparator(DL, F2, F3).compare();
+
+ bool Transitive = true;
+
+ if (Res1 != 0 && Res1 == Res4) {
+ // F1 > F2, F2 > F3 => F1 > F3
+ Transitive = Res3 == Res1;
+ } else if (Res3 != 0 && Res3 == -Res4) {
+ // F1 > F3, F3 > F2 => F1 > F2
+ Transitive = Res3 == Res1;
+ } else if (Res4 != 0 && -Res3 == Res4) {
+ // F2 > F3, F3 > F1 => F2 > F1
+ Transitive = Res4 == -Res1;
+ }
+
+ if (!Transitive) {
+ dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: "
+ << TripleNumber << "\n";
+ dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", "
+ << Res4 << "\n";
+ F1->dump();
+ F2->dump();
+ F3->dump();
+ Valid = false;
+ }
+ }
+ }
+ }
+
+ dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n";
+ return Valid;
+ }
+ return true;
+}
+
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
@@ -1158,12 +1210,13 @@ bool MergeFunctions::runOnModule(Module &M) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
Deferred.push_back(WeakVH(I));
}
- FnSet.resize(Deferred.size());
do {
std::vector<WeakVH> Worklist;
Deferred.swap(Worklist);
+ DEBUG(doSanityCheck(Worklist));
+
DEBUG(dbgs() << "size of module: " << M.size() << '\n');
DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
@@ -1175,8 +1228,7 @@ bool MergeFunctions::runOnModule(Module &M) {
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
!F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, DL);
- Changed |= insert(CF);
+ Changed |= insert(F);
}
}
@@ -1190,38 +1242,17 @@ bool MergeFunctions::runOnModule(Module &M) {
Function *F = cast<Function>(*I);
if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
F->mayBeOverridden()) {
- ComparableFunction CF = ComparableFunction(F, DL);
- Changed |= insert(CF);
+ Changed |= insert(F);
}
}
- DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+ DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n');
} while (!Deferred.empty());
- FnSet.clear();
+ FnTree.clear();
return Changed;
}
-bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
- const ComparableFunction &RHS) {
- if (LHS.getFunc() == RHS.getFunc() &&
- LHS.getHash() == RHS.getHash())
- return true;
- if (!LHS.getFunc() || !RHS.getFunc())
- return false;
-
- // One of these is a special "underlying pointer comparison only" object.
- if (LHS.getDataLayout() == ComparableFunction::LookupOnly ||
- RHS.getDataLayout() == ComparableFunction::LookupOnly)
- return false;
-
- assert(LHS.getDataLayout() == RHS.getDataLayout() &&
- "Comparing functions for different targets");
-
- return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(),
- RHS.getFunc()).compare();
-}
-
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
@@ -1376,54 +1407,57 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
++NumFunctionsMerged;
}
-// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// Insert a ComparableFunction into the FnTree, or merge it away if equal to one
// that was already inserted.
-bool MergeFunctions::insert(ComparableFunction &NewF) {
- std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+bool MergeFunctions::insert(Function *NewFunction) {
+ std::pair<FnTreeType::iterator, bool> Result =
+ FnTree.insert(FunctionPtr(NewFunction, DL));
+
if (Result.second) {
- DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+ DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
return false;
}
- const ComparableFunction &OldF = *Result.first;
+ const FunctionPtr &OldF = *Result.first;
// Don't merge tiny functions, since it can just end up making the function
// larger.
// FIXME: Should still merge them if they are unnamed_addr and produce an
// alias.
- if (NewF.getFunc()->size() == 1) {
- if (NewF.getFunc()->front().size() <= 2) {
- DEBUG(dbgs() << NewF.getFunc()->getName()
- << " is to small to bother merging\n");
+ if (NewFunction->size() == 1) {
+ if (NewFunction->front().size() <= 2) {
+ DEBUG(dbgs() << NewFunction->getName()
+ << " is to small to bother merging\n");
return false;
}
}
// Never thunk a strong function to a weak function.
- assert(!OldF.getFunc()->mayBeOverridden() ||
- NewF.getFunc()->mayBeOverridden());
+ assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden());
- DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == "
- << NewF.getFunc()->getName() << '\n');
+ DEBUG(dbgs() << " " << OldF.getFunc()->getName()
+ << " == " << NewFunction->getName() << '\n');
- Function *DeleteF = NewF.getFunc();
- NewF.release();
+ Function *DeleteF = NewFunction;
mergeTwoFunctions(OldF.getFunc(), DeleteF);
return true;
}
-// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
-// so that we'll look at it in the next round.
+// Remove a function from FnTree. If it was already in FnTree, add
+// it to Deferred so that we'll look at it in the next round.
void MergeFunctions::remove(Function *F) {
// We need to make sure we remove F, not a function "equal" to F per the
// function equality comparator.
- //
- // The special "lookup only" ComparableFunction bypasses the expensive
- // function comparison in favour of a pointer comparison on the underlying
- // Function*'s.
- ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
- if (FnSet.erase(CF)) {
- DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+ FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL));
+ size_t Erased = 0;
+ if (found != FnTree.end() && found->getFunc() == F) {
+ Erased = 1;
+ FnTree.erase(found);
+ }
+
+ if (Erased) {
+ DEBUG(dbgs() << "Removed " << F->getName()
+ << " from set and deferred it.\n");
Deferred.push_back(F);
}
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 38e1b8e..46a3187 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -53,6 +53,10 @@ static cl::opt<bool>
RunLoopRerolling("reroll-loops", cl::Hidden,
cl::desc("Run the loop rerolling pass"));
+static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
+ cl::Hidden,
+ cl::desc("Run the load combining pass"));
+
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
@@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() {
SLPVectorize = RunSLPVectorization;
LoopVectorize = RunLoopVectorization;
RerollLoops = RunLoopRerolling;
+ LoadCombine = RunLoadCombine;
}
PassManagerBuilder::~PassManagerBuilder() {
@@ -151,9 +156,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (!DisableUnitAtATime) {
addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+ MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
- MPM.add(createIPSCCPPass()); // IP SCCP
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
@@ -236,6 +241,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createLoopUnrollPass());
}
+ if (LoadCombine)
+ MPM.add(createLoadCombinePass());
+
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
@@ -352,6 +360,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// More scalar chains could be vectorized due to more alias information
PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+ if (LoadCombine)
+ PM.add(createLoadCombinePass());
+
// Cleanup and simplify the code after the scalar optimizations.
PM.add(createInstructionCombiningPass());
addExtensionsToPM(EP_Peephole, PM);
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index e04b1be..ab4dc1c 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -37,8 +37,9 @@ enum SelectPatternFlavor {
SPF_SMIN,
SPF_UMIN,
SPF_SMAX,
- SPF_UMAX
- // SPF_ABS - TODO.
+ SPF_UMAX,
+ SPF_ABS,
+ SPF_NABS
};
/// getComplexity: Assign a complexity or rank value to LLVM Values...
@@ -246,6 +247,7 @@ private:
bool DoXform = true);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+ bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS);
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c37a9cf..99f0f1f 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -865,69 +865,170 @@ Value *FAddCombine::createAddendVal
return createFMul(OpndVal, Coeff.getValue(Instr->getType()));
}
-// dyn_castFoldableMul - If this value is a multiply that can be folded into
-// other computations (because it has a constant operand), return the
-// non-constant operand of the multiply, and set CST to point to the multiplier.
-// Otherwise, return null.
-//
-static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
- if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
- return nullptr;
-
- Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return nullptr;
-
- if (I->getOpcode() == Instruction::Mul)
- if ((CST = dyn_cast<Constant>(I->getOperand(1))))
- return I->getOperand(0);
- if (I->getOpcode() == Instruction::Shl)
- if ((CST = dyn_cast<Constant>(I->getOperand(1)))) {
- // The multiplier is really 1 << CST.
- CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
- return I->getOperand(0);
- }
- return nullptr;
+// If one of the operands only has one non-zero bit, and if the other
+// operand has a known-zero bit in a more significant place than it (not
+// including the sign bit) the ripple may go up to and fill the zero, but
+// won't change the sign. For example, (X & ~4) + 1.
+static bool checkRippleForAdd(const APInt &Op0KnownZero,
+ const APInt &Op1KnownZero) {
+ APInt Op1MaybeOne = ~Op1KnownZero;
+ // Make sure that one of the operand has at most one bit set to 1.
+ if (Op1MaybeOne.countPopulation() != 1)
+ return false;
+
+ // Find the most significant known 0 other than the sign bit.
+ int BitWidth = Op0KnownZero.getBitWidth();
+ APInt Op0KnownZeroTemp(Op0KnownZero);
+ Op0KnownZeroTemp.clearBit(BitWidth - 1);
+ int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;
+
+ int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1;
+ assert(Op1OnePosition >= 0);
+
+ // This also covers the case of no known zero, since in that case
+ // Op0ZeroPosition is -1.
+ return Op0ZeroPosition >= Op1OnePosition;
}
-
/// WillNotOverflowSignedAdd - Return true if we can prove that:
/// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS))
/// This basically requires proving that the add in the original type would not
/// overflow to change the sign bit or have a carry out.
+/// TODO: Handle this for Vectors.
bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
// There are different heuristics we can use for this. Here are some simple
// ones.
- // Add has the property that adding any two 2's complement numbers can only
- // have one carry bit which can change a sign. As such, if LHS and RHS each
- // have at least two sign bits, we know that the addition of the two values
- // will sign extend fine.
+ // If LHS and RHS each have at least two sign bits, the addition will look
+ // like
+ //
+ // XX..... +
+ // YY.....
+ //
+ // If the carry into the most significant position is 0, X and Y can't both
+ // be 1 and therefore the carry out of the addition is also 0.
+ //
+ // If the carry into the most significant position is 1, X and Y can't both
+ // be 0 and therefore the carry out of the addition is also 1.
+ //
+ // Since the carry into the most significant position is always equal to
+ // the carry out of the addition, there is no signed overflow.
if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
return true;
+ if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) {
+ int BitWidth = IT->getBitWidth();
+ APInt LHSKnownZero(BitWidth, 0);
+ APInt LHSKnownOne(BitWidth, 0);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
- // If one of the operands only has one non-zero bit, and if the other operand
- // has a known-zero bit in a more significant place than it (not including the
- // sign bit) the ripple may go up to and fill the zero, but won't change the
- // sign. For example, (X & ~4) + 1.
+ APInt RHSKnownZero(BitWidth, 0);
+ APInt RHSKnownOne(BitWidth, 0);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
+
+ // Addition of two 2's compliment numbers having opposite signs will never
+ // overflow.
+ if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) ||
+ (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1]))
+ return true;
+
+ // Check if carry bit of addition will not cause overflow.
+ if (checkRippleForAdd(LHSKnownZero, RHSKnownZero))
+ return true;
+ if (checkRippleForAdd(RHSKnownZero, LHSKnownZero))
+ return true;
+ }
+ return false;
+}
- // TODO: Implement.
+/// WillNotOverflowUnsignedAdd - Return true if we can prove that:
+/// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS))
+bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) {
+ // There are different heuristics we can use for this. Here is a simple one.
+ // If the sign bit of LHS and that of RHS are both zero, no unsigned wrap.
+ bool LHSKnownNonNegative, LHSKnownNegative;
+ bool RHSKnownNonNegative, RHSKnownNegative;
+ ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0);
+ ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0);
+ if (LHSKnownNonNegative && RHSKnownNonNegative)
+ return true;
return false;
}
-Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
- bool Changed = SimplifyAssociativeOrCommutative(I);
+// Checks if any operand is negative and we can convert add to sub.
+// This function checks for following negative patterns
+// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C))
+// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C))
+// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even
+static Value *checkForNegativeOperand(BinaryOperator &I,
+ InstCombiner::BuilderTy *Builder) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
- if (Value *V = SimplifyVectorOp(I))
- return ReplaceInstUsesWith(I, V);
+ // This function creates 2 instructions to replace ADD, we need at least one
+ // of LHS or RHS to have one use to ensure benefit in transform.
+ if (!LHS->hasOneUse() && !RHS->hasOneUse())
+ return nullptr;
- if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
- I.hasNoUnsignedWrap(), DL))
- return ReplaceInstUsesWith(I, V);
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+ const APInt *C1 = nullptr, *C2 = nullptr;
+
+ // if ONE is on other side, swap
+ if (match(RHS, m_Add(m_Value(X), m_One())))
+ std::swap(LHS, RHS);
+
+ if (match(LHS, m_Add(m_Value(X), m_One()))) {
+ // if XOR on other side, swap
+ if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ std::swap(X, RHS);
+
+ if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) {
+ // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1))
+ // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1))
+ if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) {
+ Value *NewAnd = Builder->CreateAnd(Z, *C1);
+ return Builder->CreateSub(RHS, NewAnd, "sub");
+ } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) {
+ // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1))
+ // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1))
+ Value *NewOr = Builder->CreateOr(Z, ~(*C1));
+ return Builder->CreateSub(RHS, NewOr, "sub");
+ }
+ }
+ }
+
+ // Restore LHS and RHS
+ LHS = I.getOperand(0);
+ RHS = I.getOperand(1);
+
+ // if XOR is on other side, swap
+ if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ std::swap(LHS, RHS);
+
+ // C2 is ODD
+ // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
+ // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
+ if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
+ if (C1->countTrailingZeros() == 0)
+ if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
+ Value *NewOr = Builder->CreateOr(Z, ~(*C2));
+ return Builder->CreateSub(RHS, NewOr, "sub");
+ }
+ return nullptr;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+ bool Changed = SimplifyAssociativeOrCommutative(I);
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
- // (A*B)+(A*C) -> A*(B+C) etc
+ if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+ I.hasNoUnsignedWrap(), DL))
+ return ReplaceInstUsesWith(I, V);
+
+ // (A*B)+(A*C) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return ReplaceInstUsesWith(I, V);
@@ -1025,23 +1126,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (Value *V = dyn_castNegVal(RHS))
return BinaryOperator::CreateSub(LHS, V);
-
- {
- Constant *C2;
- if (Value *X = dyn_castFoldableMul(LHS, C2)) {
- if (X == RHS) // X*C + X --> X * (C+1)
- return BinaryOperator::CreateMul(RHS, AddOne(C2));
-
- // X*C1 + X*C2 --> X * (C1+C2)
- Constant *C1;
- if (X == dyn_castFoldableMul(RHS, C1))
- return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
- }
-
- // X + X*C --> X * (C+1)
- if (dyn_castFoldableMul(RHS, C2) == LHS)
- return BinaryOperator::CreateMul(LHS, AddOne(C2));
- }
+ if (Value *V = checkForNegativeOperand(I, Builder))
+ return ReplaceInstUsesWith(I, V);
// A+B --> A|B iff A and B have no bits set in common.
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
@@ -1059,29 +1145,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
}
- // W*X + Y*Z --> W * (X+Z) iff W == Y
- {
- Value *W, *X, *Y, *Z;
- if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
- match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
- if (W != Y) {
- if (W == Z) {
- std::swap(Y, Z);
- } else if (Y == X) {
- std::swap(W, X);
- } else if (X == Z) {
- std::swap(Y, Z);
- std::swap(W, X);
- }
- }
-
- if (W == Y) {
- Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
- return BinaryOperator::CreateMul(W, NewAdd);
- }
- }
- }
-
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
Value *X;
if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X
@@ -1191,6 +1254,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateOr(A, B);
}
+ // TODO(jingyue): Consider WillNotOverflowSignedAdd and
+ // WillNotOverflowUnsignedAdd to reduce the number of invocations of
+ // computeKnownBits.
+ if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS)) {
+ Changed = true;
+ I.setHasNoSignedWrap(true);
+ }
+ if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS)) {
+ Changed = true;
+ I.setHasNoUnsignedWrap(true);
+ }
+
return Changed ? &I : nullptr;
}
@@ -1478,9 +1553,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(Op0,
Builder->CreateNot(Y, Y->getName() + ".not"));
- // 0 - (X sdiv C) -> (X sdiv -C)
- if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
- match(Op0, m_Zero()))
+ // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow.
+ if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) &&
+ !C->isMinSignedValue())
return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
// 0 - (X << Y) -> (-X << Y) when X is freely negatable.
@@ -1488,19 +1563,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Value *XNeg = dyn_castNegVal(X))
return BinaryOperator::CreateShl(XNeg, Y);
- // X - X*C --> X * (1-C)
- if (match(Op1, m_Mul(m_Specific(Op0), m_Constant(CI)))) {
- Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
- return BinaryOperator::CreateMul(Op0, CP1);
- }
-
- // X - X<<C --> X * (1-(1<<C))
- if (match(Op1, m_Shl(m_Specific(Op0), m_Constant(CI)))) {
- Constant *One = ConstantInt::get(I.getType(), 1);
- C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
- return BinaryOperator::CreateMul(Op0, C);
- }
-
// X - A*-B -> X + A*B
// X - -A*B -> X + A*B
Value *A, *B;
@@ -1517,16 +1579,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
}
- Constant *C1;
- if (Value *X = dyn_castFoldableMul(Op0, C1)) {
- if (X == Op1) // X*C - X --> X * (C-1)
- return BinaryOperator::CreateMul(Op1, SubOne(C1));
-
- Constant *C2; // X*C1 - X*C2 -> X * (C1-C2)
- if (X == dyn_castFoldableMul(Op1, C2))
- return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
- }
-
// Optimize pointer differences into the same array into a size. Consider:
// &A[10] - &A[0]: we should compile this to "10".
if (DL) {
@@ -1541,7 +1593,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);
- }
+ }
return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 4f5d65a..b23a606 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
- // If we have: ((V + N) & C1) | (V & C2)
- // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
- // replace with V+N.
- if (C1->getValue() == ~C2->getValue()) {
- if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
- match(A, m_Add(m_Value(V1), m_Value(V2)))) {
- // Add commutes, try both ways.
- if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
- return ReplaceInstUsesWith(I, A);
- if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
- return ReplaceInstUsesWith(I, A);
- }
- // Or commutes, try both ways.
- if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
- match(B, m_Add(m_Value(V1), m_Value(V2)))) {
- // Add commutes, try both ways.
- if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
- return ReplaceInstUsesWith(I, B);
- if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
- return ReplaceInstUsesWith(I, B);
- }
- }
-
if ((C1->getValue() & C2->getValue()) == 0) {
// ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
// iff (C1&C2) == 0 and (N&~C1) == 0
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d4b583b..658178d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -421,6 +421,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
+
+ // We can strength reduce reduce this signed add into a regular add if we
+ // can prove that it will never overflow.
+ if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) {
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ if (WillNotOverflowSignedAdd(LHS, RHS)) {
+ Value *Add = Builder->CreateNSWAdd(LHS, RHS);
+ Add->takeName(&CI);
+ Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()};
+ StructType *ST = cast<StructType>(II->getType());
+ Constant *Struct = ConstantStruct::get(ST, V);
+ return InsertValueInst::Create(Struct, Add, 0);
+ }
+ }
+
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
@@ -800,6 +815,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+ // Note that ppc_altivec_vperm has a big-endian bias, so when creating
+ // a vectorshuffle for little endian, we must undo the transformation
+ // performed on vec_perm in altivec.h. That is, we must complement
+ // the permutation mask with respect to 31 and reverse the order of
+ // V1 and V2.
if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
assert(Mask->getType()->getVectorNumElements() == 16 &&
"Bad type for intrinsic!");
@@ -832,10 +852,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
unsigned Idx =
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
+ if (DL && DL->isLittleEndian())
+ Idx = 31 - Idx;
if (!ExtractedElts[Idx]) {
+ Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0;
+ Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1;
ExtractedElts[Idx] =
- Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
+ Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
Builder->getInt32(Idx&15));
}
@@ -913,6 +937,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::AMDGPU_rcp: {
+ if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
+ const APFloat &ArgVal = C->getValueAPF();
+ APFloat Val(ArgVal.getSemantics(), 1.0);
+ APFloat::opStatus Status = Val.divide(ArgVal,
+ APFloat::rmNearestTiesToEven);
+ // Only do this if it was exact and therefore not dependent on the
+ // rounding mode.
+ if (Status == APFloat::opOK)
+ return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
+ }
+
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 356803a..ff083d7 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1434,7 +1434,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
// If casting the result of a getelementptr instruction with no offset, turn
// this into a cast of the original pointer!
- if (GEP->hasAllZeroIndices()) {
+ if (GEP->hasAllZeroIndices() &&
+ // If CI is an addrspacecast and GEP changes the poiner type, merging
+ // GEP into CI would undo canonicalizing addrspacecast with different
+ // pointer types, causing infinite loops.
+ (!isa<AddrSpaceCastInst>(CI) ||
+ GEP->getType() == GEP->getPointerOperand()->getType())) {
// Changing the cast operand is usually not a good idea but it is safe
// here because the pointer operand is being replaced with another
// pointer operand so the opcode doesn't need to change.
@@ -1904,5 +1909,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
+ // If the destination pointer element type is not the the same as the source's
+ // do the addrspacecast to the same type, and then the bitcast in the new
+ // address space. This allows the cast to be exposed to other transforms.
+ Value *Src = CI.getOperand(0);
+ PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
+ PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
+
+ Type *DestElemTy = DestTy->getElementType();
+ if (SrcTy->getElementType() != DestElemTy) {
+ Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
+ if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) {
+ // Handle vectors of pointers.
+ MidTy = VectorType::get(MidTy, VT->getNumElements());
+ }
+
+ Value *NewBitCast = Builder->CreateBitCast(Src, MidTy);
+ return new AddrSpaceCastInst(NewBitCast, CI.getType());
+ }
+
return commonPointerCastTransforms(CI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 02e8bf1..5e71c5c 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -612,9 +612,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
if (ICmpInst::isSigned(Cond))
return nullptr;
- // Look through bitcasts.
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
- RHS = BCI->getOperand(0);
+ // Look through bitcasts and addrspacecasts. We do not however want to remove
+ // 0 GEPs.
+ if (!isa<GetElementPtrInst>(RHS))
+ RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
if (DL && PtrBase == RHS && GEPLHS->isInBounds()) {
@@ -655,9 +656,24 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
(GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
PtrBase->stripPointerCasts() ==
GEPRHS->getOperand(0)->stripPointerCasts()) {
+ Value *LOffset = EmitGEPOffset(GEPLHS);
+ Value *ROffset = EmitGEPOffset(GEPRHS);
+
+ // If we looked through an addrspacecast between different sized address
+ // spaces, the LHS and RHS pointers are different sized
+ // integers. Truncate to the smaller one.
+ Type *LHSIndexTy = LOffset->getType();
+ Type *RHSIndexTy = ROffset->getType();
+ if (LHSIndexTy != RHSIndexTy) {
+ if (LHSIndexTy->getPrimitiveSizeInBits() <
+ RHSIndexTy->getPrimitiveSizeInBits()) {
+ ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy);
+ } else
+ LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy);
+ }
+
Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
- EmitGEPOffset(GEPLHS),
- EmitGEPOffset(GEPRHS));
+ LOffset, ROffset);
return ReplaceInstUsesWith(I, Cmp);
}
@@ -667,26 +683,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
}
// If one of the GEPs has all zero indices, recurse.
- bool AllZeros = true;
- for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
- if (!isa<Constant>(GEPLHS->getOperand(i)) ||
- !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
- AllZeros = false;
- break;
- }
- if (AllZeros)
+ if (GEPLHS->hasAllZeroIndices())
return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
ICmpInst::getSwappedPredicate(Cond), I);
// If the other GEP has all zero indices, recurse.
- AllZeros = true;
- for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
- if (!isa<Constant>(GEPRHS->getOperand(i)) ||
- !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
- AllZeros = false;
- break;
- }
- if (AllZeros)
+ if (GEPRHS->hasAllZeroIndices())
return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
@@ -2026,9 +2028,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
/// replacement required.
static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
Value *OtherVal, InstCombiner &IC) {
+ // Don't bother doing this transformation for pointers, don't do it for
+ // vectors.
+ if (!isa<IntegerType>(MulVal->getType()))
+ return nullptr;
+
assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
- assert(isa<IntegerType>(MulVal->getType()));
Instruction *MulInstr = cast<Instruction>(MulVal);
assert(MulInstr->getOpcode() == Instruction::Mul);
@@ -2523,7 +2529,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
APInt Op0KnownZeroInverted = ~Op0KnownZero;
- if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ if (~Op1KnownZero == 0) {
// If the LHS is an AND with the same constant, look through it.
Value *LHS = nullptr;
ConstantInt *LHSC = nullptr;
@@ -2533,11 +2539,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) == 0" into "x != 3".
+ // or turn "((1 << x)&7) == 0" into "x > 2".
Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
- unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
- return new ICmpInst(ICmpInst::ICMP_NE, X,
- ConstantInt::get(X->getType(), CmpVal));
+ APInt ValToCheck = Op0KnownZeroInverted;
+ if (ValToCheck.isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_NE, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ } else if ((++ValToCheck).isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros() - 1;
+ return new ICmpInst(ICmpInst::ICMP_UGT, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
}
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
@@ -2560,7 +2574,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// bit is set. If the comparison is against zero, then this is a check
// to see if *that* bit is set.
APInt Op0KnownZeroInverted = ~Op0KnownZero;
- if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+ if (~Op1KnownZero == 0) {
// If the LHS is an AND with the same constant, look through it.
Value *LHS = nullptr;
ConstantInt *LHSC = nullptr;
@@ -2570,11 +2584,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) != 0" into "x == 3".
+ // or turn "((1 << x)&7) != 0" into "x < 3".
Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
- unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
- return new ICmpInst(ICmpInst::ICMP_EQ, X,
- ConstantInt::get(X->getType(), CmpVal));
+ APInt ValToCheck = Op0KnownZeroInverted;
+ if (ValToCheck.isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_EQ, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ } else if ((++ValToCheck).isPowerOf2()) {
+ unsigned CmpVal = ValToCheck.countTrailingZeros();
+ return new ICmpInst(ICmpInst::ICMP_ULT, X,
+ ConstantInt::get(X->getType(), CmpVal));
+ }
}
// If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 66d0938..c10e92a 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -50,99 +50,102 @@ static bool pointsToConstantGlobal(Value *V) {
/// can optimize this.
static bool
isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
- SmallVectorImpl<Instruction *> &ToDelete,
- bool IsOffset = false) {
+ SmallVectorImpl<Instruction *> &ToDelete) {
// We track lifetime intrinsics as we encounter them. If we decide to go
// ahead and replace the value with the global, this lets the caller quickly
// eliminate the markers.
- for (Use &U : V->uses()) {
- Instruction *I = cast<Instruction>(U.getUser());
-
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- // Ignore non-volatile loads, they are always ok.
- if (!LI->isSimple()) return false;
- continue;
- }
-
- if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
- // If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset))
- return false;
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has all zero indices, it doesn't offset the pointer. If it
- // doesn't, it does.
- if (!isOnlyCopiedFromConstantGlobal(
- GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices()))
- return false;
- continue;
- }
+ SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
+ ValuesToInspect.push_back(std::make_pair(V, false));
+ while (!ValuesToInspect.empty()) {
+ auto ValuePair = ValuesToInspect.pop_back_val();
+ const bool IsOffset = ValuePair.second;
+ for (auto &U : ValuePair.first->uses()) {
+ Instruction *I = cast<Instruction>(U.getUser());
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ // Ignore non-volatile loads, they are always ok.
+ if (!LI->isSimple()) return false;
+ continue;
+ }
- if (CallSite CS = I) {
- // If this is the function being called then we treat it like a load and
- // ignore it.
- if (CS.isCallee(&U))
+ if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
+ // If uses of the bitcast are ok, we are ok.
+ ValuesToInspect.push_back(std::make_pair(I, IsOffset));
continue;
+ }
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+ // If the GEP has all zero indices, it doesn't offset the pointer. If it
+ // doesn't, it does.
+ ValuesToInspect.push_back(
+ std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
+ continue;
+ }
- // Inalloca arguments are clobbered by the call.
- unsigned ArgNo = CS.getArgumentNo(&U);
- if (CS.isInAllocaArgument(ArgNo))
- return false;
+ if (CallSite CS = I) {
+ // If this is the function being called then we treat it like a load and
+ // ignore it.
+ if (CS.isCallee(&U))
+ continue;
- // If this is a readonly/readnone call site, then we know it is just a
- // load (but one that potentially returns the value itself), so we can
- // ignore it if we know that the value isn't captured.
- if (CS.onlyReadsMemory() &&
- (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
- continue;
+ // Inalloca arguments are clobbered by the call.
+ unsigned ArgNo = CS.getArgumentNo(&U);
+ if (CS.isInAllocaArgument(ArgNo))
+ return false;
- // If this is being passed as a byval argument, the caller is making a
- // copy, so it is only a read of the alloca.
- if (CS.isByValArgument(ArgNo))
- continue;
- }
+ // If this is a readonly/readnone call site, then we know it is just a
+ // load (but one that potentially returns the value itself), so we can
+ // ignore it if we know that the value isn't captured.
+ if (CS.onlyReadsMemory() &&
+ (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+ continue;
+
+ // If this is being passed as a byval argument, the caller is making a
+ // copy, so it is only a read of the alloca.
+ if (CS.isByValArgument(ArgNo))
+ continue;
+ }
- // Lifetime intrinsics can be handled by the caller.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- assert(II->use_empty() && "Lifetime markers have no result to use!");
- ToDelete.push_back(II);
- continue;
+ // Lifetime intrinsics can be handled by the caller.
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ assert(II->use_empty() && "Lifetime markers have no result to use!");
+ ToDelete.push_back(II);
+ continue;
+ }
}
- }
- // If this is isn't our memcpy/memmove, reject it as something we can't
- // handle.
- MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
- if (!MI)
- return false;
+ // If this is isn't our memcpy/memmove, reject it as something we can't
+ // handle.
+ MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
+ if (!MI)
+ return false;
- // If the transfer is using the alloca as a source of the transfer, then
- // ignore it since it is a load (unless the transfer is volatile).
- if (U.getOperandNo() == 1) {
- if (MI->isVolatile()) return false;
- continue;
- }
+ // If the transfer is using the alloca as a source of the transfer, then
+ // ignore it since it is a load (unless the transfer is volatile).
+ if (U.getOperandNo() == 1) {
+ if (MI->isVolatile()) return false;
+ continue;
+ }
- // If we already have seen a copy, reject the second one.
- if (TheCopy) return false;
+ // If we already have seen a copy, reject the second one.
+ if (TheCopy) return false;
- // If the pointer has been offset from the start of the alloca, we can't
- // safely handle this.
- if (IsOffset) return false;
+ // If the pointer has been offset from the start of the alloca, we can't
+ // safely handle this.
+ if (IsOffset) return false;
- // If the memintrinsic isn't using the alloca as the dest, reject it.
- if (U.getOperandNo() != 0) return false;
+ // If the memintrinsic isn't using the alloca as the dest, reject it.
+ if (U.getOperandNo() != 0) return false;
- // If the source of the memcpy/move is not a constant global, reject it.
- if (!pointsToConstantGlobal(MI->getSource()))
- return false;
+ // If the source of the memcpy/move is not a constant global, reject it.
+ if (!pointsToConstantGlobal(MI->getSource()))
+ return false;
- // Otherwise, the transform is safe. Remember the copy instruction.
- TheCopy = MI;
+ // Otherwise, the transform is safe. Remember the copy instruction.
+ TheCopy = MI;
+ }
}
return true;
}
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 9996ebc..6c6e7d8 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
Value *X;
Constant *C1;
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) {
- Value *Add = Builder->CreateMul(X, Op1);
- return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1));
+ Value *Mul = Builder->CreateMul(C1, Op1);
+ // Only go forward with the transform if C1*CI simplifies to a tidier
+ // constant.
+ if (!match(Mul, m_Mul(m_Value(), m_Value())))
+ return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul);
}
}
}
@@ -990,6 +993,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
if (Constant *RHS = dyn_cast<Constant>(Op1)) {
+ // X/INT_MIN -> X == INT_MIN
+ if (RHS->isMinSignedValue())
+ return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType());
+
// -X/C --> X/-C provided the negation doesn't overflow.
if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9a41e4b..06c9e29 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -31,13 +31,18 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
if (!ICI) return SPF_UNKNOWN;
- LHS = ICI->getOperand(0);
- RHS = ICI->getOperand(1);
+ ICmpInst::Predicate Pred = ICI->getPredicate();
+ Value *CmpLHS = ICI->getOperand(0);
+ Value *CmpRHS = ICI->getOperand(1);
+ Value *TrueVal = SI->getTrueValue();
+ Value *FalseVal = SI->getFalseValue();
+
+ LHS = CmpLHS;
+ RHS = CmpRHS;
// (icmp X, Y) ? X : Y
- if (SI->getTrueValue() == ICI->getOperand(0) &&
- SI->getFalseValue() == ICI->getOperand(1)) {
- switch (ICI->getPredicate()) {
+ if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
+ switch (Pred) {
default: return SPF_UNKNOWN; // Equality.
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE: return SPF_UMAX;
@@ -51,18 +56,35 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
}
// (icmp X, Y) ? Y : X
- if (SI->getTrueValue() == ICI->getOperand(1) &&
- SI->getFalseValue() == ICI->getOperand(0)) {
- switch (ICI->getPredicate()) {
- default: return SPF_UNKNOWN; // Equality.
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_UGE: return SPF_UMIN;
- case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_SGE: return SPF_SMIN;
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_ULE: return SPF_UMAX;
- case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+ switch (Pred) {
+ default: return SPF_UNKNOWN; // Equality.
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: return SPF_UMIN;
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: return SPF_SMIN;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: return SPF_UMAX;
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: return SPF_SMAX;
+ }
+ }
+
+ if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+ if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
+ (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
+
+ // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
+ // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
+ if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+ return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+ }
+
+ // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
+ // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
+ if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+ return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+ }
}
}
@@ -365,7 +387,15 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
/// 1. The icmp predicate is inverted
/// 2. The select operands are reversed
/// 3. The magnitude of C2 and C1 are flipped
-static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
+///
+/// This also tries to turn
+/// --- Single bit tests:
+/// if ((x & C) == 0) x |= C to x |= C
+/// if ((x & C) != 0) x ^= C to x &= ~C
+/// if ((x & C) == 0) x ^= C to x |= C
+/// if ((x & C) != 0) x &= ~C to x &= ~C
+/// if ((x & C) == 0) x &= ~C to nothing
+static Value *foldSelectICmpAndOr(SelectInst &SI, Value *TrueVal,
Value *FalseVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
@@ -384,6 +414,25 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
return nullptr;
const APInt *C2;
+ if (match(TrueVal, m_Specific(X))) {
+ // if ((X & C) != 0) X ^= C becomes X &= ~C
+ if (match(FalseVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2)
+ return Builder->CreateAnd(X, ~(*C1));
+ // if ((X & C) != 0) X &= ~C becomes X &= ~C
+ if (match(FalseVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2))
+ return FalseVal;
+ } else if (match(FalseVal, m_Specific(X))) {
+ // if ((X & C) == 0) X ^= C becomes X |= C
+ if (match(TrueVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2)
+ return Builder->CreateOr(X, *C1);
+ // if ((X & C) == 0) X &= ~C becomes nothing
+ if (match(TrueVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2))
+ return X;
+ // if ((X & C) == 0) X |= C becomes X |= C
+ if (match(TrueVal, m_Or(m_Specific(X), m_APInt(C2))) && C1 == C2)
+ return TrueVal;
+ }
+
bool OrOnTrueVal = false;
bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2)));
if (!OrOnFalseVal)
@@ -677,6 +726,22 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
}
}
}
+
+ // ABS(ABS(X)) -> ABS(X)
+ // NABS(NABS(X)) -> NABS(X)
+ if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) {
+ return ReplaceInstUsesWith(Outer, Inner);
+ }
+
+ // ABS(NABS(X)) -> ABS(X)
+ // NABS(ABS(X)) -> NABS(X)
+ if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) ||
+ (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
+ SelectInst *SI = cast<SelectInst>(Inner);
+ Value *NewSI = Builder->CreateSelect(
+ SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
+ return ReplaceInstUsesWith(Outer, NewSI);
+ }
return nullptr;
}
@@ -981,7 +1046,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// TODO.
// ABS(-X) -> ABS(X)
- // ABS(ABS(X)) -> ABS(X)
}
// See if we can fold the select into a phi node if the condition is a select.
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index cc6665c..2495747 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
// have a sign-extend idiom.
Value *X;
if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
- // If the left shift is just shifting out partial signbits, delete the
- // extension.
- if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
- return ReplaceInstUsesWith(I, X);
-
// If the input is an extension from the shifted amount value, e.g.
// %x = zext i8 %A to i32
// %y = shl i32 %x, 24
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 8c5e202..cb16584 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -144,7 +144,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// If the operand is the PHI induction variable:
if (PHIInVal == PHIUser) {
// Scalarize the binary operation. Its first operand is the
- // scalar PHI and the second operand is extracted from the other
+ // scalar PHI, and the second operand is extracted from the other
// vector operand.
BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
@@ -361,7 +361,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
- // Okay, we can handle this if the vector we are insertinting into is
+ // We can handle this if the vector we are inserting into is
// transitively ok.
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted undef.
@@ -376,7 +376,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
// This must be extracting from either LHS or RHS.
if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
- // Okay, we can handle this if the vector we are insertinting into is
+ // We can handle this if the vector we are inserting into is
// transitively ok.
if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
// If so, update the mask to reflect the inserted value.
@@ -403,7 +403,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// We are building a shuffle to create V, which is a sequence of insertelement,
/// extractelement pairs. If PermittedRHS is set, then we must either use it or
-/// not rely on the second vector source. Return an std::pair containing the
+/// not rely on the second vector source. Return a std::pair containing the
/// left and right vectors of the proposed shuffle (or 0), and set the Mask
/// parameter as required.
///
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4c36887..08e2446 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -42,6 +42,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -395,6 +396,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
return false;
}
+/// This function returns identity value for given opcode, which can be used to
+/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1).
+static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) {
+ if (isa<Constant>(V))
+ return nullptr;
+
+ if (OpCode == Instruction::Mul)
+ return ConstantInt::get(V->getType(), 1);
+
+ // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc.
+
+ return nullptr;
+}
+
+/// This function factors binary ops which can be combined using distributive
+/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4).
+static Instruction::BinaryOps
+getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) {
+ if (!Op)
+ return Instruction::BinaryOpsEnd;
+
+ if (Op->getOpcode() == Instruction::Shl) {
+ if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) {
+ // The multiplier is really 1 << CST.
+ RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST);
+ LHS = Op->getOperand(0);
+ return Instruction::Mul;
+ }
+ }
+
+ // TODO: We can add other conversions e.g. shr => div etc.
+
+ LHS = Op->getOperand(0);
+ RHS = Op->getOperand(1);
+ return Op->getOpcode();
+}
+
+/// This tries to simplify binary operations by factorizing out common terms
+/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
+static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
+ const DataLayout *DL, BinaryOperator &I,
+ Instruction::BinaryOps InnerOpcode, Value *A,
+ Value *B, Value *C, Value *D) {
+
+ // If any of A, B, C, D are null, we can not factor I, return early.
+ // Checking A and C should be enough.
+ if (!A || !C || !B || !D)
+ return nullptr;
+
+ Value *SimplifiedInst = nullptr;
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
+
+ // Does "X op' Y" always equal "Y op' X"?
+ bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+ // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+ if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+ // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+ // commutative case, "(A op' B) op (C op' A)"?
+ if (A == C || (InnerCommutative && A == D)) {
+ if (A != C)
+ std::swap(C, D);
+ // Consider forming "A op' (B op D)".
+ // If "B op D" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
+ // If "B op D" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName());
+ if (V) {
+ SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V);
+ }
+ }
+
+ // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+ if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+ // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+ // commutative case, "(A op' B) op (B op' D)"?
+ if (B == D || (InnerCommutative && B == C)) {
+ if (B != D)
+ std::swap(C, D);
+ // Consider forming "(A op C) op' B".
+ // If "A op C" simplifies then it can be formed with no cost.
+ Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
+
+ // If "A op C" doesn't simplify then only go on if both of the existing
+ // operations "A op' B" and "C op' D" will be zapped as no longer used.
+ if (!V && LHS->hasOneUse() && RHS->hasOneUse())
+ V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName());
+ if (V) {
+ SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B);
+ }
+ }
+
+ if (SimplifiedInst) {
+ ++NumFactor;
+ SimplifiedInst->takeName(&I);
+
+ // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag.
+ // TODO: Check for NUW.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {
+ if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {
+ bool HasNSW = false;
+ if (isa<OverflowingBinaryOperator>(&I))
+ HasNSW = I.hasNoSignedWrap();
+
+ if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+ if (isa<OverflowingBinaryOperator>(Op0))
+ HasNSW &= Op0->hasNoSignedWrap();
+
+ if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+ if (isa<OverflowingBinaryOperator>(Op1))
+ HasNSW &= Op1->hasNoSignedWrap();
+ BO->setHasNoSignedWrap(HasNSW);
+ }
+ }
+ }
+ return SimplifiedInst;
+}
+
/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
/// which some other binary operation distributes over either by factorizing
/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
@@ -404,65 +526,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
- Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
// Factorization.
- if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
- // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
- // a common term.
- Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
- Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
- Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+ Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B);
+ Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D);
+
+ // The instruction has the form "(A op' B) op (C op' D)". Try to factorize
+ // a common term.
+ if (LHSOpcode == RHSOpcode) {
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D))
+ return V;
+ }
- // Does "X op' Y" always equal "Y op' X"?
- bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
-
- // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
- if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
- // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
- // commutative case, "(A op' B) op (C op' A)"?
- if (A == C || (InnerCommutative && A == D)) {
- if (A != C)
- std::swap(C, D);
- // Consider forming "A op' (B op D)".
- // If "B op D" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL);
- // If "B op D" doesn't simplify then only go on if both of the existing
- // operations "A op' B" and "C op' D" will be zapped as no longer used.
- if (!V && Op0->hasOneUse() && Op1->hasOneUse())
- V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
- if (V) {
- ++NumFactor;
- V = Builder->CreateBinOp(InnerOpcode, A, V);
- V->takeName(&I);
- return V;
- }
- }
+ // The instruction has the form "(A op' B) op (C)". Try to factorize common
+ // term.
+ if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS,
+ getIdentityValue(LHSOpcode, RHS)))
+ return V;
- // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
- if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
- // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
- // commutative case, "(A op' B) op (B op' D)"?
- if (B == D || (InnerCommutative && B == C)) {
- if (B != D)
- std::swap(C, D);
- // Consider forming "(A op C) op' B".
- // If "A op C" simplifies then it can be formed with no cost.
- Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL);
- // If "A op C" doesn't simplify then only go on if both of the existing
- // operations "A op' B" and "C op' D" will be zapped as no longer used.
- if (!V && Op0->hasOneUse() && Op1->hasOneUse())
- V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
- if (V) {
- ++NumFactor;
- V = Builder->CreateBinOp(InnerOpcode, V, B);
- V->takeName(&I);
- return V;
- }
- }
- }
+ // The instruction has the form "(B) op (C op' D)". Try to factorize common
+ // term.
+ if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS,
+ getIdentityValue(RHSOpcode, LHS), C, D))
+ return V;
// Expansion.
+ Instruction::BinaryOps TopLevelOpcode = I.getOpcode();
if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
// The instruction has the form "(A op' B) op C". See if expanding it out
// to "(A op C) op' (B op C)" results in simplifications.
@@ -1030,6 +1120,12 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
return nullptr;
}
+ // If Op is zero then Val = Op * Scale.
+ if (match(Op, m_Zero())) {
+ NoSignedWrap = true;
+ return Op;
+ }
+
// We know that we can successfully descale, so from here on we can safely
// modify the IR. Op holds the descaled version of the deepest term in the
// expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
@@ -1106,6 +1202,11 @@ static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
if (!Inst.getType()->isVectorTy()) return nullptr;
+ // It may not be safe to reorder shuffles and things like div, urem, etc.
+ // because we may trap when executing those ops on unknown vector elements.
+ // See PR20059.
+ if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr;
+
unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
@@ -1138,7 +1239,9 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
- if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) &&
+ if (Shuffle && C1 &&
+ (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) &&
+ isa<UndefValue>(Shuffle->getOperand(1)) &&
Shuffle->getType() == Shuffle->getOperand(0)->getType()) {
SmallVector<int, 16> ShMask = Shuffle->getShuffleMask();
// Find constant C2 that has property:
@@ -1220,6 +1323,91 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (MadeChange) return &GEP;
}
+ // Check to see if the inputs to the PHI node are getelementptr instructions.
+ if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) {
+ GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0));
+ if (!Op1)
+ return nullptr;
+
+ signed DI = -1;
+
+ for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) {
+ GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I);
+ if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands())
+ return nullptr;
+
+ // Keep track of the type as we walk the GEP.
+ Type *CurTy = Op1->getOperand(0)->getType()->getScalarType();
+
+ for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) {
+ if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType())
+ return nullptr;
+
+ if (Op1->getOperand(J) != Op2->getOperand(J)) {
+ if (DI == -1) {
+ // We have not seen any differences yet in the GEPs feeding the
+ // PHI yet, so we record this one if it is allowed to be a
+ // variable.
+
+ // The first two arguments can vary for any GEP, the rest have to be
+ // static for struct slots
+ if (J > 1 && CurTy->isStructTy())
+ return nullptr;
+
+ DI = J;
+ } else {
+ // The GEP is different by more than one input. While this could be
+ // extended to support GEPs that vary by more than one variable it
+ // doesn't make sense since it greatly increases the complexity and
+ // would result in an R+R+R addressing mode which no backend
+ // directly supports and would need to be broken into several
+ // simpler instructions anyway.
+ return nullptr;
+ }
+ }
+
+ // Sink down a layer of the type for the next iteration.
+ if (J > 0) {
+ if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) {
+ CurTy = CT->getTypeAtIndex(Op1->getOperand(J));
+ } else {
+ CurTy = nullptr;
+ }
+ }
+ }
+ }
+
+ GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+
+ if (DI == -1) {
+ // All the GEPs feeding the PHI are identical. Clone one down into our
+ // BB so that it can be merged with the current GEP.
+ GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+ NewGEP);
+ } else {
+ // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
+ // into the current block so it can be merged, and create a new PHI to
+ // set that index.
+ Instruction *InsertPt = Builder->GetInsertPoint();
+ Builder->SetInsertPoint(PN);
+ PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+ PN->getNumOperands());
+ Builder->SetInsertPoint(InsertPt);
+
+ for (auto &I : PN->operands())
+ NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
+ PN->getIncomingBlock(I));
+
+ NewGEP->setOperand(DI, NewPN);
+ GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(),
+ NewGEP);
+ NewGEP->setOperand(DI, NewPN);
+ }
+
+ GEP.setOperand(0, NewGEP);
+ PtrOp = NewGEP;
+ }
+
// Combine Indices - If the source pointer to this getelementptr instruction
// is a getelementptr instruction, combine the indices of the two
// getelementptr instructions into a single instruction.
@@ -2014,7 +2202,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// Simplify the list of clauses, eg by removing repeated catch clauses
// (these are often created by inlining).
bool MakeNewInstruction = false; // If true, recreate using the following:
- SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction;
+ SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction;
bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup.
SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already.
@@ -2022,8 +2210,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
bool isLastClause = i + 1 == e;
if (LI.isCatch(i)) {
// A catch clause.
- Value *CatchClause = LI.getClause(i);
- Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts());
+ Constant *CatchClause = LI.getClause(i);
+ Constant *TypeInfo = CatchClause->stripPointerCasts();
// If we already saw this clause, there is no point in having a second
// copy of it.
@@ -2052,7 +2240,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// equal (for example if one represents a C++ class, and the other some
// class derived from it).
assert(LI.isFilter(i) && "Unsupported landingpad clause!");
- Value *FilterClause = LI.getClause(i);
+ Constant *FilterClause = LI.getClause(i);
ArrayType *FilterType = cast<ArrayType>(FilterClause->getType());
unsigned NumTypeInfos = FilterType->getNumElements();
@@ -2096,8 +2284,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
// catch-alls. If so, the filter can be discarded.
bool SawCatchAll = false;
for (unsigned j = 0; j != NumTypeInfos; ++j) {
- Value *Elt = Filter->getOperand(j);
- Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts());
+ Constant *Elt = Filter->getOperand(j);
+ Constant *TypeInfo = Elt->stripPointerCasts();
if (isCatchAll(Personality, TypeInfo)) {
// This element is a catch-all. Bail out, noting this fact.
SawCatchAll = true;
@@ -2202,7 +2390,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
continue;
// If Filter is a subset of LFilter, i.e. every element of Filter is also
// an element of LFilter, then discard LFilter.
- SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j;
+ SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j;
// If Filter is empty then it is a subset of LFilter.
if (!FElts) {
// Discard LFilter.
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 95fca75..5e5ddc1 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -16,6 +16,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
@@ -39,15 +40,14 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/system_error.h"
#include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
#include <algorithm>
#include <string>
+#include <system_error>
using namespace llvm;
@@ -70,7 +70,7 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
static const char *const kAsanModuleCtorName = "asan.module_ctor";
static const char *const kAsanModuleDtorName = "asan.module_dtor";
-static const int kAsanCtorAndCtorPriority = 1;
+static const int kAsanCtorAndDtorPriority = 1;
static const char *const kAsanReportErrorTemplate = "__asan_report_";
static const char *const kAsanReportLoadN = "__asan_report_load_n";
static const char *const kAsanReportStoreN = "__asan_report_store_n";
@@ -79,7 +79,7 @@ static const char *const kAsanUnregisterGlobalsName =
"__asan_unregister_globals";
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanInitName = "__asan_init_v4";
static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
@@ -128,9 +128,8 @@ static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb",
// This flag may need to be replaced with -f[no]asan-stack.
static cl::opt<bool> ClStack("asan-stack",
cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
-// This flag may need to be replaced with -f[no]asan-use-after-return.
static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
- cl::desc("Check return-after-free"), cl::Hidden, cl::init(false));
+ cl::desc("Check return-after-free"), cl::Hidden, cl::init(true));
// This flag may need to be replaced with -f[no]asan-globals.
static cl::opt<bool> ClGlobals("asan-globals",
cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
@@ -142,16 +141,13 @@ static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold",
"are more than this number of blocks."),
cl::Hidden, cl::init(1500));
static cl::opt<bool> ClInitializers("asan-initialization-order",
- cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
+ cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true));
static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
cl::Hidden, cl::init(false));
static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
cl::desc("Realign stack to the value of this flag (power of two)"),
cl::Hidden, cl::init(32));
-static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
- cl::desc("File containing the list of objects to ignore "
- "during instrumentation"), cl::Hidden);
static cl::opt<int> ClInstrumentationWithCallsThreshold(
"asan-instrumentation-with-call-threshold",
cl::desc("If the function being instrumented contains more than "
@@ -216,29 +212,87 @@ STATISTIC(NumOptimizedAccessesToGlobalVar,
"Number of optimized accesses to global vars");
namespace {
-/// A set of dynamically initialized globals extracted from metadata.
-class SetOfDynamicallyInitializedGlobals {
+/// Frontend-provided metadata for global variables.
+class GlobalsMetadata {
public:
- void Init(Module& M) {
- // Clang generates metadata identifying all dynamically initialized globals.
- NamedMDNode *DynamicGlobals =
- M.getNamedMetadata("llvm.asan.dynamically_initialized_globals");
- if (!DynamicGlobals)
+ GlobalsMetadata() : inited_(false) {}
+ void init(Module& M) {
+ assert(!inited_);
+ inited_ = true;
+ NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
+ if (!Globals)
return;
- for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) {
- MDNode *MDN = DynamicGlobals->getOperand(i);
- assert(MDN->getNumOperands() == 1);
- Value *VG = MDN->getOperand(0);
- // The optimizer may optimize away a global entirely, in which case we
- // cannot instrument access to it.
- if (!VG)
+ for (auto MDN : Globals->operands()) {
+ // Format of the metadata node for the global:
+ // {
+ // global,
+ // source_location,
+ // i1 is_dynamically_initialized,
+ // i1 is_blacklisted
+ // }
+ assert(MDN->getNumOperands() == 4);
+ Value *V = MDN->getOperand(0);
+ // The optimizer may optimize away a global entirely.
+ if (!V)
continue;
- DynInitGlobals.insert(cast<GlobalVariable>(VG));
+ GlobalVariable *GV = cast<GlobalVariable>(V);
+ if (Value *Loc = MDN->getOperand(1)) {
+ GlobalVariable *GVLoc = cast<GlobalVariable>(Loc);
+ // We may already know the source location for GV, if it was merged
+ // with another global.
+ if (SourceLocation.insert(std::make_pair(GV, GVLoc)).second)
+ addSourceLocationGlobal(GVLoc);
+ }
+ ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(2));
+ if (IsDynInit->isOne())
+ DynInitGlobals.insert(GV);
+ ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(3));
+ if (IsBlacklisted->isOne())
+ BlacklistedGlobals.insert(GV);
}
}
- bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; }
+
+ GlobalVariable *getSourceLocation(GlobalVariable *G) const {
+ auto Pos = SourceLocation.find(G);
+ return (Pos != SourceLocation.end()) ? Pos->second : nullptr;
+ }
+
+ /// Check if the global is dynamically initialized.
+ bool isDynInit(GlobalVariable *G) const {
+ return DynInitGlobals.count(G);
+ }
+
+ /// Check if the global was blacklisted.
+ bool isBlacklisted(GlobalVariable *G) const {
+ return BlacklistedGlobals.count(G);
+ }
+
+ /// Check if the global was generated to describe source location of another
+ /// global (we don't want to instrument them).
+ bool isSourceLocationGlobal(GlobalVariable *G) const {
+ return LocationGlobals.count(G);
+ }
+
private:
- SmallSet<GlobalValue*, 32> DynInitGlobals;
+ bool inited_;
+ DenseMap<GlobalVariable*, GlobalVariable*> SourceLocation;
+ DenseSet<GlobalVariable*> DynInitGlobals;
+ DenseSet<GlobalVariable*> BlacklistedGlobals;
+ DenseSet<GlobalVariable*> LocationGlobals;
+
+ void addSourceLocationGlobal(GlobalVariable *SourceLocGV) {
+ // Source location global is a struct with layout:
+ // {
+ // filename,
+ // i32 line_number,
+ // i32 column_number,
+ // }
+ LocationGlobals.insert(SourceLocGV);
+ ConstantStruct *Contents =
+ cast<ConstantStruct>(SourceLocGV->getInitializer());
+ GlobalVariable *FilenameGV = cast<GlobalVariable>(Contents->getOperand(0));
+ LocationGlobals.insert(FilenameGV);
+ }
};
/// This struct defines the shadow mapping using the rule:
@@ -306,16 +360,7 @@ static size_t RedzoneSizeForScale(int MappingScale) {
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer : public FunctionPass {
- AddressSanitizer(bool CheckInitOrder = true,
- bool CheckUseAfterReturn = false,
- bool CheckLifetime = false,
- StringRef BlacklistFile = StringRef())
- : FunctionPass(ID),
- CheckInitOrder(CheckInitOrder || ClInitializers),
- CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
- CheckLifetime(CheckLifetime || ClCheckLifetime),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) {}
+ AddressSanitizer() : FunctionPass(ID) {}
const char *getPassName() const override {
return "AddressSanitizerFunctionPass";
}
@@ -344,11 +389,6 @@ struct AddressSanitizer : public FunctionPass {
bool InjectCoverage(Function &F, const ArrayRef<BasicBlock*> AllBlocks);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB);
- bool CheckInitOrder;
- bool CheckUseAfterReturn;
- bool CheckLifetime;
- SmallString<64> BlacklistFile;
-
LLVMContext *C;
const DataLayout *DL;
int LongSize;
@@ -359,7 +399,6 @@ struct AddressSanitizer : public FunctionPass {
Function *AsanHandleNoReturnFunc;
Function *AsanCovFunction;
Function *AsanPtrCmpFunction, *AsanPtrSubFunction;
- std::unique_ptr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
@@ -368,19 +407,14 @@ struct AddressSanitizer : public FunctionPass {
*AsanMemoryAccessCallbackSized[2];
Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
- SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ GlobalsMetadata GlobalsMD;
friend struct FunctionStackPoisoner;
};
class AddressSanitizerModule : public ModulePass {
public:
- AddressSanitizerModule(bool CheckInitOrder = true,
- StringRef BlacklistFile = StringRef())
- : ModulePass(ID),
- CheckInitOrder(CheckInitOrder || ClInitializers),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) {}
+ AddressSanitizerModule() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
static char ID; // Pass identification, replacement for typeid
const char *getPassName() const override {
@@ -390,17 +424,15 @@ class AddressSanitizerModule : public ModulePass {
private:
void initializeCallbacks(Module &M);
+ bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
bool ShouldInstrumentGlobal(GlobalVariable *G);
+ void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
size_t MinRedzoneSizeForGlobal() const {
return RedzoneSizeForScale(Mapping.Scale);
}
- bool CheckInitOrder;
- SmallString<64> BlacklistFile;
-
- std::unique_ptr<SpecialCaseList> BL;
- SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
+ GlobalsMetadata GlobalsMD;
Type *IntptrTy;
LLVMContext *C;
const DataLayout *DL;
@@ -497,7 +529,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// \brief Collect lifetime intrinsic calls to check for use-after-scope
/// errors.
void visitIntrinsicInst(IntrinsicInst &II) {
- if (!ASan.CheckLifetime) return;
+ if (!ClCheckLifetime) return;
Intrinsic::ID ID = II.getIntrinsicID();
if (ID != Intrinsic::lifetime_start &&
ID != Intrinsic::lifetime_end)
@@ -552,20 +584,16 @@ char AddressSanitizer::ID = 0;
INITIALIZE_PASS(AddressSanitizer, "asan",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
false, false)
-FunctionPass *llvm::createAddressSanitizerFunctionPass(
- bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
- StringRef BlacklistFile) {
- return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
- CheckLifetime, BlacklistFile);
+FunctionPass *llvm::createAddressSanitizerFunctionPass() {
+ return new AddressSanitizer();
}
char AddressSanitizerModule::ID = 0;
INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
"AddressSanitizer: detects use-after-free and out-of-bounds bugs."
"ModulePass", false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(
- bool CheckInitOrder, StringRef BlacklistFile) {
- return new AddressSanitizerModule(CheckInitOrder, BlacklistFile);
+ModulePass *llvm::createAddressSanitizerModulePass() {
+ return new AddressSanitizerModule();
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -682,7 +710,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
// If a global variable does not have dynamic initialization we don't
// have to instrument it. However, if a global does not have initializer
// at all, we assume it has dynamic initializer (in other TU).
- return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G);
+ return G->hasInitializer() && !GlobalsMD.isDynInit(G);
}
void
@@ -706,7 +734,7 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
// If initialization order checking is disabled, a simple access to a
// dynamically initialized global is always valid.
- if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) {
+ if (!ClInitializers || GlobalIsLinkerInitialized(G)) {
NumOptimizedAccessesToGlobalVar++;
return;
}
@@ -851,48 +879,36 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Crash->setDebugLoc(OrigIns->getDebugLoc());
}
-void AddressSanitizerModule::createInitializerPoisonCalls(
- Module &M, GlobalValue *ModuleName) {
- // We do all of our poisoning and unpoisoning within a global constructor.
- // These are called _GLOBAL__(sub_)?I_.*.
- // TODO: Consider looking through the functions in
- // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly
- // typed approach.
- Function *GlobalInit = nullptr;
- for (auto &F : M.getFunctionList()) {
- StringRef FName = F.getName();
-
- const char kGlobalPrefix[] = "_GLOBAL__";
- if (!FName.startswith(kGlobalPrefix))
- continue;
- FName = FName.substr(strlen(kGlobalPrefix));
-
- const char kOptionalSub[] = "sub_";
- if (FName.startswith(kOptionalSub))
- FName = FName.substr(strlen(kOptionalSub));
-
- if (FName.startswith("I_")) {
- GlobalInit = &F;
- break;
- }
- }
- // If that function is not present, this TU contains no globals, or they have
- // all been optimized away
- if (!GlobalInit)
- return;
-
+void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
+ GlobalValue *ModuleName) {
// Set up the arguments to our poison/unpoison functions.
- IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt());
+ IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt());
// Add a call to poison all external globals before the given function starts.
Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr);
// Add calls to unpoison all globals before each return instruction.
- for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
- I != E; ++I) {
- if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
+ for (auto &BB : GlobalInit.getBasicBlockList())
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
CallInst::Create(AsanUnpoisonGlobals, "", RI);
+}
+
+void AddressSanitizerModule::createInitializerPoisonCalls(
+ Module &M, GlobalValue *ModuleName) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ for (Use &OP : CA->operands()) {
+ if (isa<ConstantAggregateZero>(OP))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(OP);
+
+ // Must have a function or null ptr.
+ // (CS->getOperand(0) is the init priority.)
+ if (Function* F = dyn_cast<Function>(CS->getOperand(1))) {
+ if (F->getName() != kAsanModuleCtorName)
+ poisonOneInitializer(*F, ModuleName);
}
}
}
@@ -901,16 +917,20 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
- if (BL->isIn(*G)) return false;
+ if (GlobalsMD.isBlacklisted(G)) return false;
+ if (GlobalsMD.isSourceLocationGlobal(G)) return false;
if (!Ty->isSized()) return false;
if (!G->hasInitializer()) return false;
if (GlobalWasGeneratedByAsan(G)) return false; // Our own global.
// Touch only those globals that will not be defined in other modules.
- // Don't handle ODR type linkages since other modules may be built w/o asan.
+ // Don't handle ODR linkage types and COMDATs since other modules may be built
+ // without ASan.
if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
G->getLinkage() != GlobalVariable::PrivateLinkage &&
G->getLinkage() != GlobalVariable::InternalLinkage)
return false;
+ if (G->hasComdat())
+ return false;
// Two problems with thread-locals:
// - The address of the main thread's copy can't be computed at link-time.
// - Need to poison all copies, not just the main thread's one.
@@ -1001,39 +1021,16 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
// This function replaces all global variables with new variables that have
// trailing redzones. It also creates a function that poisons
// redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizerModule::runOnModule(Module &M) {
- if (!ClGlobals) return false;
-
- DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- if (!DLP)
- return false;
- DL = &DLP->getDataLayout();
-
- BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
- if (BL->isIn(M)) return false;
- C = &(M.getContext());
- int LongSize = DL->getPointerSizeInBits();
- IntptrTy = Type::getIntNTy(*C, LongSize);
- Mapping = getShadowMapping(M, LongSize);
- initializeCallbacks(M);
- DynamicallyInitializedGlobals.Init(M);
+bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
+ GlobalsMD.init(M);
SmallVector<GlobalVariable *, 16> GlobalsToChange;
- for (Module::GlobalListType::iterator G = M.global_begin(),
- E = M.global_end(); G != E; ++G) {
- if (ShouldInstrumentGlobal(G))
- GlobalsToChange.push_back(G);
+ for (auto &G : M.globals()) {
+ if (ShouldInstrumentGlobal(&G))
+ GlobalsToChange.push_back(&G);
}
- Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
- assert(CtorFunc);
- IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
-
- Function *CovFunc = M.getFunction(kAsanCovName);
- int nCov = CovFunc ? CovFunc->getNumUses() : 0;
- IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov));
-
size_t n = GlobalsToChange.size();
if (n == 0) return false;
@@ -1044,10 +1041,11 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
// const char *name;
// const char *module_name;
// size_t has_dynamic_init;
+ // void *source_location;
// We initialize an array of such structures and pass it to a run-time call.
- StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
- IntptrTy, IntptrTy,
- IntptrTy, IntptrTy, NULL);
+ StructType *GlobalStructTy =
+ StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
+ IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n);
bool HasDynamicallyInitializedGlobals = false;
@@ -1075,11 +1073,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ);
assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
- // Determine whether this global should be poisoned in initialization.
- bool GlobalHasDynamicInitializer =
- DynamicallyInitializedGlobals.Contains(G);
- // Don't check initialization order if this global is blacklisted.
- GlobalHasDynamicInitializer &= !BL->isIn(*G, "init");
StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
Constant *NewInitializer = ConstantStruct::get(
@@ -1108,18 +1101,21 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
NewGlobal->takeName(G);
G->eraseFromParent();
+ bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G);
+ GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G);
+
Initializers[i] = ConstantStruct::get(
- GlobalStructTy,
- ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+ GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
ConstantInt::get(IntptrTy, SizeInBytes),
ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
ConstantExpr::getPointerCast(Name, IntptrTy),
ConstantExpr::getPointerCast(ModuleName, IntptrTy),
ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer),
+ SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy)
+ : ConstantInt::get(IntptrTy, 0),
NULL);
- // Populate the first and last globals declared in this TU.
- if (CheckInitOrder && GlobalHasDynamicInitializer)
+ if (ClInitializers && GlobalHasDynamicInitializer)
HasDynamicallyInitializedGlobals = true;
DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
@@ -1131,7 +1127,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
// Create calls for poisoning before initializers run and unpoisoning after.
- if (CheckInitOrder && HasDynamicallyInitializedGlobals)
+ if (HasDynamicallyInitializedGlobals)
createInitializerPoisonCalls(M, ModuleName);
IRB.CreateCall2(AsanRegisterGlobals,
IRB.CreatePointerCast(AllGlobals, IntptrTy),
@@ -1147,12 +1143,42 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
IRB.CreatePointerCast(AllGlobals, IntptrTy),
ConstantInt::get(IntptrTy, n));
- appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+ appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);
DEBUG(dbgs() << M);
return true;
}
+bool AddressSanitizerModule::runOnModule(Module &M) {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP)
+ return false;
+ DL = &DLP->getDataLayout();
+ C = &(M.getContext());
+ int LongSize = DL->getPointerSizeInBits();
+ IntptrTy = Type::getIntNTy(*C, LongSize);
+ Mapping = getShadowMapping(M, LongSize);
+ initializeCallbacks(M);
+
+ bool Changed = false;
+
+ Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+ assert(CtorFunc);
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+
+ if (ClCoverage > 0) {
+ Function *CovFunc = M.getFunction(kAsanCovName);
+ int nCov = CovFunc ? CovFunc->getNumUses() : 0;
+ IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov));
+ Changed = true;
+ }
+
+ if (ClGlobals)
+ Changed |= InstrumentGlobals(IRB, M);
+
+ return Changed;
+}
+
void AddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
// Create __asan_report* callbacks.
@@ -1216,8 +1242,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
- BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
- DynamicallyInitializedGlobals.Init(M);
+ GlobalsMD.init(M);
C = &(M.getContext());
LongSize = DL->getPointerSizeInBits();
@@ -1236,7 +1261,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
Mapping = getShadowMapping(M, LongSize);
- appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+ appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
return true;
}
@@ -1267,7 +1292,9 @@ void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) {
break;
}
+ DebugLoc EntryLoc = IP->getDebugLoc().getFnDebugLoc(*C);
IRBuilder<> IRB(IP);
+ IRB.SetCurrentDebugLocation(EntryLoc);
Type *Int8Ty = IRB.getInt8Ty();
GlobalVariable *Guard = new GlobalVariable(
*F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage,
@@ -1279,10 +1306,10 @@ void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) {
Instruction *Ins = SplitBlockAndInsertIfThen(
Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
IRB.SetInsertPoint(Ins);
+ IRB.SetCurrentDebugLocation(EntryLoc);
// We pass &F to __sanitizer_cov. We could avoid this and rely on
// GET_CALLER_PC, but having the PC of the first instruction is just nice.
- Instruction *Call = IRB.CreateCall(AsanCovFunction);
- Call->setDebugLoc(IP->getDebugLoc());
+ IRB.CreateCall(AsanCovFunction);
StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard);
Store->setAtomic(Monotonic);
Store->setAlignment(1);
@@ -1316,14 +1343,13 @@ bool AddressSanitizer::InjectCoverage(Function &F,
(unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
InjectCoverageAtBlock(F, F.getEntryBlock());
} else {
- for (size_t i = 0, n = AllBlocks.size(); i < n; i++)
- InjectCoverageAtBlock(F, *AllBlocks[i]);
+ for (auto BB : AllBlocks)
+ InjectCoverageAtBlock(F, *BB);
}
return true;
}
bool AddressSanitizer::runOnFunction(Function &F) {
- if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
@@ -1350,29 +1376,28 @@ bool AddressSanitizer::runOnFunction(Function &F) {
unsigned Alignment;
// Fill the set of memory operations to instrument.
- for (Function::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI) {
- AllBlocks.push_back(FI);
+ for (auto &BB : F) {
+ AllBlocks.push_back(&BB);
TempsToInstrument.clear();
int NumInsnsPerBB = 0;
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- if (LooksLikeCodeInBug11395(BI)) return false;
- if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) {
+ for (auto &Inst : BB) {
+ if (LooksLikeCodeInBug11395(&Inst)) return false;
+ if (Value *Addr =
+ isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) {
if (ClOpt && ClOptSameTemp) {
if (!TempsToInstrument.insert(Addr))
continue; // We've seen this temp in the current BB.
}
} else if (ClInvalidPointerPairs &&
- isInterestingPointerComparisonOrSubtraction(BI)) {
- PointerComparisonsOrSubtracts.push_back(BI);
+ isInterestingPointerComparisonOrSubtraction(&Inst)) {
+ PointerComparisonsOrSubtracts.push_back(&Inst);
continue;
- } else if (isa<MemIntrinsic>(BI)) {
+ } else if (isa<MemIntrinsic>(Inst)) {
// ok, take it.
} else {
- if (isa<AllocaInst>(BI))
+ if (isa<AllocaInst>(Inst))
NumAllocas++;
- CallSite CS(BI);
+ CallSite CS(&Inst);
if (CS) {
// A call inside BB.
TempsToInstrument.clear();
@@ -1381,7 +1406,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
continue;
}
- ToInstrument.push_back(BI);
+ ToInstrument.push_back(&Inst);
NumInsnsPerBB++;
if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB)
break;
@@ -1406,8 +1431,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// Instrument.
int NumInstrumented = 0;
- for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
- Instruction *Inst = ToInstrument[i];
+ for (auto Inst : ToInstrument) {
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
@@ -1423,14 +1447,13 @@ bool AddressSanitizer::runOnFunction(Function &F) {
// We must unpoison the stack before every NoReturn call (throw, _exit, etc).
// See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
- for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
- Instruction *CI = NoReturnCalls[i];
+ for (auto CI : NoReturnCalls) {
IRBuilder<> IRB(CI);
IRB.CreateCall(AsanHandleNoReturnFunc);
}
- for (size_t i = 0, n = PointerComparisonsOrSubtracts.size(); i != n; i++) {
- instrumentPointerComparisonOrSubtraction(PointerComparisonsOrSubtracts[i]);
+ for (auto Inst : PointerComparisonsOrSubtracts) {
+ instrumentPointerComparisonOrSubtraction(Inst);
NumInstrumented++;
}
@@ -1543,12 +1566,10 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
}
static DebugLoc getFunctionEntryDebugLocation(Function &F) {
- BasicBlock::iterator I = F.getEntryBlock().begin(),
- E = F.getEntryBlock().end();
- for (; I != E; ++I)
- if (!isa<AllocaInst>(I))
- break;
- return I->getDebugLoc();
+ for (const auto &Inst : F.getEntryBlock())
+ if (!isa<AllocaInst>(Inst))
+ return Inst.getDebugLoc();
+ return DebugLoc();
}
void FunctionStackPoisoner::poisonStack() {
@@ -1562,8 +1583,7 @@ void FunctionStackPoisoner::poisonStack() {
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
- for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
- AllocaInst *AI = AllocaVec[i];
+ for (AllocaInst *AI : AllocaVec) {
ASanStackVariableDescription D = { AI->getName().data(),
getAllocaSizeInBytes(AI),
AI->getAlignment(), AI, 0};
@@ -1577,7 +1597,7 @@ void FunctionStackPoisoner::poisonStack() {
DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n");
uint64_t LocalStackSize = L.FrameSize;
bool DoStackMalloc =
- ASan.CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
+ ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize;
Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
AllocaInst *MyAlloca =
@@ -1618,8 +1638,7 @@ void FunctionStackPoisoner::poisonStack() {
// Insert poison calls for lifetime intrinsics for alloca.
bool HavePoisonedAllocas = false;
- for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) {
- const AllocaPoisonCall &APC = AllocaPoisonCallVec[i];
+ for (const auto &APC : AllocaPoisonCallVec) {
assert(APC.InsBefore);
assert(APC.AI);
IRBuilder<> IRB(APC.InsBefore);
@@ -1628,11 +1647,10 @@ void FunctionStackPoisoner::poisonStack() {
}
// Replace Alloca instructions with base+offset.
- for (size_t i = 0, n = SVD.size(); i < n; i++) {
- AllocaInst *AI = SVD[i].AI;
+ for (const auto &Desc : SVD) {
+ AllocaInst *AI = Desc.AI;
Value *NewAllocaPtr = IRB.CreateIntToPtr(
- IRB.CreateAdd(LocalStackBase,
- ConstantInt::get(IntptrTy, SVD[i].Offset)),
+ IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),
AI->getType());
replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB);
AI->replaceAllUsesWith(NewAllocaPtr);
@@ -1665,8 +1683,7 @@ void FunctionStackPoisoner::poisonStack() {
poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true);
// (Un)poison the stack before all ret instructions.
- for (size_t i = 0, n = RetVec.size(); i < n; i++) {
- Instruction *Ret = RetVec[i];
+ for (auto Ret : RetVec) {
IRBuilder<> IRBRet(Ret);
// Mark the current frame as retired.
IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
@@ -1720,8 +1737,8 @@ void FunctionStackPoisoner::poisonStack() {
}
// We are done. Remove the old unused alloca instructions.
- for (size_t i = 0, n = AllocaVec.size(); i < n; i++)
- AllocaVec[i]->eraseFromParent();
+ for (auto AI : AllocaVec)
+ AI->eraseFromParent();
}
void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 7f468f7..799e14b 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -59,9 +59,9 @@
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
#include <iterator>
using namespace llvm;
@@ -120,6 +120,51 @@ static cl::opt<bool> ClDebugNonzeroLabels(
namespace {
+StringRef GetGlobalTypeString(const GlobalValue &G) {
+ // Types of GlobalVariables are always pointer types.
+ Type *GType = G.getType()->getElementType();
+ // For now we support blacklisting struct types only.
+ if (StructType *SGType = dyn_cast<StructType>(GType)) {
+ if (!SGType->isLiteral())
+ return SGType->getName();
+ }
+ return "<unknown type>";
+}
+
+class DFSanABIList {
+ std::unique_ptr<SpecialCaseList> SCL;
+
+ public:
+ DFSanABIList(SpecialCaseList *SCL) : SCL(SCL) {}
+
+ /// Returns whether either this function or its source file are listed in the
+ /// given category.
+ bool isIn(const Function &F, const StringRef Category) const {
+ return isIn(*F.getParent(), Category) ||
+ SCL->inSection("fun", F.getName(), Category);
+ }
+
+ /// Returns whether this global alias is listed in the given category.
+ ///
+ /// If GA aliases a function, the alias's name is matched as a function name
+ /// would be. Similarly, aliases of globals are matched like globals.
+ bool isIn(const GlobalAlias &GA, const StringRef Category) const {
+ if (isIn(*GA.getParent(), Category))
+ return true;
+
+ if (isa<FunctionType>(GA.getType()->getElementType()))
+ return SCL->inSection("fun", GA.getName(), Category);
+
+ return SCL->inSection("global", GA.getName(), Category) ||
+ SCL->inSection("type", GetGlobalTypeString(GA), Category);
+ }
+
+ /// Returns whether this module is listed in the given category.
+ bool isIn(const Module &M, const StringRef Category) const {
+ return SCL->inSection("src", M.getModuleIdentifier(), Category);
+ }
+};
+
class DataFlowSanitizer : public ModulePass {
friend struct DFSanFunction;
friend class DFSanVisitor;
@@ -190,7 +235,7 @@ class DataFlowSanitizer : public ModulePass {
Constant *DFSanSetLabelFn;
Constant *DFSanNonzeroLabelFn;
MDNode *ColdCallWeights;
- std::unique_ptr<SpecialCaseList> ABIList;
+ DFSanABIList ABIList;
DenseMap<Value *, Function *> UnwrappedFnMap;
AttributeSet ReadOnlyNoneAttrs;
@@ -395,11 +440,11 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
}
bool DataFlowSanitizer::isInstrumented(const Function *F) {
- return !ABIList->isIn(*F, "uninstrumented");
+ return !ABIList.isIn(*F, "uninstrumented");
}
bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
- return !ABIList->isIn(*GA, "uninstrumented");
+ return !ABIList.isIn(*GA, "uninstrumented");
}
DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
@@ -407,11 +452,11 @@ DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
}
DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
- if (ABIList->isIn(*F, "functional"))
+ if (ABIList.isIn(*F, "functional"))
return WK_Functional;
- if (ABIList->isIn(*F, "discard"))
+ if (ABIList.isIn(*F, "discard"))
return WK_Discard;
- if (ABIList->isIn(*F, "custom"))
+ if (ABIList.isIn(*F, "custom"))
return WK_Custom;
return WK_Warning;
@@ -500,7 +545,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
if (!DL)
return false;
- if (ABIList->isIn(M, "skip"))
+ if (ABIList.isIn(M, "skip"))
return false;
if (!GetArgTLSPtr) {
@@ -557,7 +602,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
++i;
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- if (Function *F = dyn_cast<Function>(GA->getAliasee())) {
+ if (auto F = dyn_cast<Function>(GA->getBaseObject())) {
bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
addGlobalNamePrefix(GA);
@@ -567,7 +612,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// below will take care of instrumenting it.
Function *NewF =
buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
- GA->replaceAllUsesWith(NewF);
+ GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
NewF->takeName(GA);
GA->eraseFromParent();
FnsToInstrument.push_back(NewF);
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 18bda1a..f2f1738 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -354,7 +354,10 @@ private:
std::string getTypeName(Type *T) {
std::string TypeName;
raw_string_ostream TypeStream(TypeName);
- T->print(TypeStream);
+ if (T)
+ T->print(TypeStream);
+ else
+ TypeStream << "Printing <null> Type";
TypeStream.flush();
return TypeName;
}
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 8330a9b..cfeb62e 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -211,6 +211,7 @@ namespace {
class GCOVLines : public GCOVRecord {
public:
void addLine(uint32_t Line) {
+ assert(Line != 0 && "Line zero is not a valid real line number.");
Lines.push_back(Line);
}
@@ -453,10 +454,17 @@ static bool functionHasLines(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
I != IE; ++I) {
+ // Debug intrinsic locations correspond to the location of the
+ // declaration, not necessarily any statements or expressions.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
const DebugLoc &Loc = I->getDebugLoc();
if (Loc.isUnknown()) continue;
- if (Loc.getLine() != 0)
- return true;
+
+ // Artificial lines such as calls to the global constructors.
+ if (Loc.getLine() == 0) continue;
+
+ return true;
}
}
return false;
@@ -515,8 +523,16 @@ void GCOVProfiler::emitProfileNotes() {
uint32_t Line = 0;
for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
I != IE; ++I) {
+ // Debug intrinsic locations correspond to the location of the
+ // declaration, not necessarily any statements or expressions.
+ if (isa<DbgInfoIntrinsic>(I)) continue;
+
const DebugLoc &Loc = I->getDebugLoc();
if (Loc.isUnknown()) continue;
+
+ // Artificial lines such as calls to the global constructors.
+ if (Loc.getLine() == 0) continue;
+
if (Line == Loc.getLine()) continue;
Line = Loc.getLine();
if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b8e632e..496ab48 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -10,8 +10,6 @@
/// This file is a part of MemorySanitizer, a detector of uninitialized
/// reads.
///
-/// Status: early prototype.
-///
/// The algorithm of the tool is similar to Memcheck
/// (http://goo.gl/QKbem). We associate a few shadow bits with every
/// byte of the application memory, poison the shadow of the malloc-ed
@@ -117,7 +115,6 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
using namespace llvm;
@@ -178,10 +175,6 @@ static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
cl::desc("print out instructions with default strict semantics"),
cl::Hidden, cl::init(false));
-static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
- cl::desc("File containing the list of functions where MemorySanitizer "
- "should not report bugs"), cl::Hidden);
-
static cl::opt<int> ClInstrumentationWithCallThreshold(
"msan-instrumentation-with-call-threshold",
cl::desc(
@@ -211,13 +204,11 @@ namespace {
/// uninitialized reads.
class MemorySanitizer : public FunctionPass {
public:
- MemorySanitizer(int TrackOrigins = 0,
- StringRef BlacklistFile = StringRef())
+ MemorySanitizer(int TrackOrigins = 0)
: FunctionPass(ID),
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
DL(nullptr),
WarningFn(nullptr),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const override { return "MemorySanitizer"; }
bool runOnFunction(Function &F) override;
@@ -282,10 +273,6 @@ class MemorySanitizer : public FunctionPass {
MDNode *ColdCallWeights;
/// \brief Branch weights for origin store.
MDNode *OriginStoreWeights;
- /// \brief Path to blacklist file.
- SmallString<64> BlacklistFile;
- /// \brief The blacklist.
- std::unique_ptr<SpecialCaseList> BL;
/// \brief An empty volatile inline asm that prevents callback merge.
InlineAsm *EmptyAsm;
@@ -305,9 +292,8 @@ INITIALIZE_PASS(MemorySanitizer, "msan",
"MemorySanitizer: detects uninitialized reads.",
false, false)
-FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins,
- StringRef BlacklistFile) {
- return new MemorySanitizer(TrackOrigins, BlacklistFile);
+FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins) {
+ return new MemorySanitizer(TrackOrigins);
}
/// \brief Create a non-const global initialized with the given string.
@@ -431,7 +417,6 @@ bool MemorySanitizer::doInitialization(Module &M) {
report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
- BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
C = &(M.getContext());
unsigned PtrSize = DL->getPointerSizeInBits(/* AddressSpace */0);
switch (PtrSize) {
@@ -526,7 +511,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// The following flags disable parts of MSan instrumentation based on
// blacklist contents and command-line options.
bool InsertChecks;
- bool LoadShadow;
+ bool PropagateShadow;
bool PoisonStack;
bool PoisonUndef;
bool CheckReturnValue;
@@ -544,11 +529,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
: F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
- bool SanitizeFunction = !MS.BL->isIn(F) && F.getAttributes().hasAttribute(
- AttributeSet::FunctionIndex,
- Attribute::SanitizeMemory);
+ bool SanitizeFunction = F.getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::SanitizeMemory);
InsertChecks = SanitizeFunction;
- LoadShadow = SanitizeFunction;
+ PropagateShadow = SanitizeFunction;
PoisonStack = SanitizeFunction && ClPoisonStack;
PoisonUndef = SanitizeFunction && ClPoisonUndef;
// FIXME: Consider using SpecialCaseList to specify a list of functions that
@@ -585,7 +569,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
IRB.CreateCall3(Fn, ConvertedShadow2,
IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
- updateOrigin(Origin, IRB));
+ Origin);
} else {
Value *Cmp = IRB.CreateICmpNE(
ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
@@ -599,26 +583,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void materializeStores(bool InstrumentWithCalls) {
- for (size_t i = 0, n = StoreList.size(); i < n; i++) {
- StoreInst &I = *dyn_cast<StoreInst>(StoreList[i]);
+ for (auto Inst : StoreList) {
+ StoreInst &SI = *dyn_cast<StoreInst>(Inst);
- IRBuilder<> IRB(&I);
- Value *Val = I.getValueOperand();
- Value *Addr = I.getPointerOperand();
- Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
+ IRBuilder<> IRB(&SI);
+ Value *Val = SI.getValueOperand();
+ Value *Addr = SI.getPointerOperand();
+ Value *Shadow = SI.isAtomic() ? getCleanShadow(Val) : getShadow(Val);
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
- IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, SI.getAlignment());
DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
- if (ClCheckAccessAddress) insertShadowCheck(Addr, &I);
+ if (ClCheckAccessAddress) insertShadowCheck(Addr, &SI);
- if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
if (MS.TrackOrigins) {
- unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
+ unsigned Alignment = std::max(kMinOriginAlignment, SI.getAlignment());
storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment,
InstrumentWithCalls);
}
@@ -662,18 +646,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
void materializeChecks(bool InstrumentWithCalls) {
- for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Instruction *OrigIns = InstrumentationList[i].OrigIns;
- Value *Shadow = InstrumentationList[i].Shadow;
- Value *Origin = InstrumentationList[i].Origin;
+ for (const auto &ShadowData : InstrumentationList) {
+ Instruction *OrigIns = ShadowData.OrigIns;
+ Value *Shadow = ShadowData.Shadow;
+ Value *Origin = ShadowData.Origin;
materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
}
DEBUG(dbgs() << "DONE:\n" << F);
}
void materializeIndirectCalls() {
- for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) {
- CallSite CS = IndirectCallList[i];
+ for (auto &CS : IndirectCallList) {
Instruction *I = CS.getInstruction();
BasicBlock *B = I->getParent();
IRBuilder<> IRB(I);
@@ -732,15 +715,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Finalize PHI nodes.
- for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
- PHINode *PN = ShadowPHINodes[i];
+ for (PHINode *PN : ShadowPHINodes) {
PHINode *PNS = cast<PHINode>(getShadow(PN));
PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
size_t NumValues = PN->getNumIncomingValues();
for (size_t v = 0; v < NumValues; v++) {
PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
- if (PNO)
- PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+ if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
}
}
@@ -874,7 +855,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Set SV to be the shadow value for V.
void setShadow(Value *V, Value *SV) {
assert(!ShadowMap.count(V) && "Values may only have one shadow");
- ShadowMap[V] = SV;
+ ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
}
/// \brief Set Origin to be the origin value for V.
@@ -926,6 +907,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// This function either returns the value set earlier with setShadow,
/// or extracts if from ParamTLS (for function arguments).
Value *getShadow(Value *V) {
+ if (!PropagateShadow) return getCleanShadow(V);
if (Instruction *I = dyn_cast<Instruction>(V)) {
// For instructions the shadow is already stored in the map.
Value *Shadow = ShadowMap[V];
@@ -950,22 +932,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Function *F = A->getParent();
IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
unsigned ArgOffset = 0;
- for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- if (!AI->getType()->isSized()) {
+ for (auto &FArg : F->args()) {
+ if (!FArg.getType()->isSized()) {
DEBUG(dbgs() << "Arg is not sized\n");
continue;
}
- unsigned Size = AI->hasByValAttr()
- ? MS.DL->getTypeAllocSize(AI->getType()->getPointerElementType())
- : MS.DL->getTypeAllocSize(AI->getType());
- if (A == AI) {
- Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
- if (AI->hasByValAttr()) {
+ unsigned Size = FArg.hasByValAttr()
+ ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType())
+ : MS.DL->getTypeAllocSize(FArg.getType());
+ if (A == &FArg) {
+ Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
+ if (FArg.hasByValAttr()) {
// ByVal pointer itself has clean shadow. We copy the actual
// argument shadow to the underlying memory.
// Figure out maximal valid memcpy alignment.
- unsigned ArgAlign = AI->getParamAlignment();
+ unsigned ArgAlign = FArg.getParamAlignment();
if (ArgAlign == 0) {
Type *EltType = A->getType()->getPointerElementType();
ArgAlign = MS.DL->getABITypeAlignment(EltType);
@@ -980,10 +961,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
*ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment);
}
- DEBUG(dbgs() << " ARG: " << *AI << " ==> " <<
+ DEBUG(dbgs() << " ARG: " << FArg << " ==> " <<
**ShadowPtr << "\n");
if (MS.TrackOrigins) {
- Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
+ Value *OriginPtr =
+ getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
}
}
@@ -1093,7 +1075,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> IRB(I.getNextNode());
Type *ShadowTy = getShadowTy(&I);
Value *Addr = I.getPointerOperand();
- if (LoadShadow) {
+ if (PropagateShadow) {
Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
setShadow(&I,
IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
@@ -1108,7 +1090,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
I.setOrdering(addAcquireOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
- if (LoadShadow) {
+ if (PropagateShadow) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
setOrigin(&I,
IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment));
@@ -1320,10 +1302,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (!Origin) {
Origin = OpOrigin;
} else {
- Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
- Value *Cond = IRB.CreateICmpNE(FlatShadow,
- MSV->getCleanShadow(FlatShadow));
- Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+ Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
+ // No point in adding something that might result in 0 origin value.
+ if (!ConstOrigin || !ConstOrigin->isNullValue()) {
+ Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB);
+ Value *Cond =
+ IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
+ Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
+ }
}
}
return *this;
@@ -1411,13 +1397,61 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
SC.Done(&I);
}
+ // \brief Handle multiplication by constant.
+ //
+ // Handle a special case of multiplication by constant that may have one or
+ // more zeros in the lower bits. This makes corresponding number of lower bits
+ // of the result zero as well. We model it by shifting the other operand
+ // shadow left by the required number of bits. Effectively, we transform
+ // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
+ // We use multiplication by 2**N instead of shift to cover the case of
+ // multiplication by 0, which may occur in some elements of a vector operand.
+ void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
+ Value *OtherArg) {
+ Constant *ShadowMul;
+ Type *Ty = ConstArg->getType();
+ if (Ty->isVectorTy()) {
+ unsigned NumElements = Ty->getVectorNumElements();
+ Type *EltTy = Ty->getSequentialElementType();
+ SmallVector<Constant *, 16> Elements;
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ ConstantInt *Elt =
+ dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ Elements.push_back(ConstantInt::get(EltTy, V2));
+ }
+ ShadowMul = ConstantVector::get(Elements);
+ } else {
+ ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
+ APInt V = Elt->getValue();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ ShadowMul = ConstantInt::get(Elt->getType(), V2);
+ }
+
+ IRBuilder<> IRB(&I);
+ setShadow(&I,
+ IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
+ setOrigin(&I, getOrigin(OtherArg));
+ }
+
+ void visitMul(BinaryOperator &I) {
+ Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+ Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+ if (constOp0 && !constOp1)
+ handleMulByConstant(I, constOp0, I.getOperand(1));
+ else if (constOp1 && !constOp0)
+ handleMulByConstant(I, constOp1, I.getOperand(0));
+ else
+ handleShadowOr(I);
+ }
+
void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
void visitSub(BinaryOperator &I) { handleShadowOr(I); }
void visitXor(BinaryOperator &I) { handleShadowOr(I); }
- void visitMul(BinaryOperator &I) { handleShadowOr(I); }
void handleDiv(Instruction &I) {
IRBuilder<> IRB(&I);
@@ -1723,7 +1757,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Addr = I.getArgOperand(0);
Type *ShadowTy = getShadowTy(&I);
- if (LoadShadow) {
+ if (PropagateShadow) {
Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
// We don't know the pointer alignment (could be unaligned SSE load!).
// Have to assume to worst case.
@@ -1736,7 +1770,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
insertShadowCheck(Addr, &I);
if (MS.TrackOrigins) {
- if (LoadShadow)
+ if (PropagateShadow)
setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB)));
else
setOrigin(&I, getCleanOrigin());
@@ -1946,6 +1980,120 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // \brief Get an X86_MMX-sized vector type.
+ Type *getMMXVectorTy(unsigned EltSizeInBits) {
+ const unsigned X86_MMXSizeInBits = 64;
+ return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
+ X86_MMXSizeInBits / EltSizeInBits);
+ }
+
+ // \brief Returns a signed counterpart for an (un)signed-saturate-and-pack
+ // intrinsic.
+ Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
+ switch (id) {
+ case llvm::Intrinsic::x86_sse2_packsswb_128:
+ case llvm::Intrinsic::x86_sse2_packuswb_128:
+ return llvm::Intrinsic::x86_sse2_packsswb_128;
+
+ case llvm::Intrinsic::x86_sse2_packssdw_128:
+ case llvm::Intrinsic::x86_sse41_packusdw:
+ return llvm::Intrinsic::x86_sse2_packssdw_128;
+
+ case llvm::Intrinsic::x86_avx2_packsswb:
+ case llvm::Intrinsic::x86_avx2_packuswb:
+ return llvm::Intrinsic::x86_avx2_packsswb;
+
+ case llvm::Intrinsic::x86_avx2_packssdw:
+ case llvm::Intrinsic::x86_avx2_packusdw:
+ return llvm::Intrinsic::x86_avx2_packssdw;
+
+ case llvm::Intrinsic::x86_mmx_packsswb:
+ case llvm::Intrinsic::x86_mmx_packuswb:
+ return llvm::Intrinsic::x86_mmx_packsswb;
+
+ case llvm::Intrinsic::x86_mmx_packssdw:
+ return llvm::Intrinsic::x86_mmx_packssdw;
+ default:
+ llvm_unreachable("unexpected intrinsic id");
+ }
+ }
+
+ // \brief Instrument vector pack instrinsic.
+ //
+ // This function instruments intrinsics like x86_mmx_packsswb, that
+ // packs elements of 2 input vectors into half as many bits with saturation.
+ // Shadow is propagated with the signed variant of the same intrinsic applied
+ // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
+ // EltSizeInBits is used only for x86mmx arguments.
+ void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
+ assert(I.getNumArgOperands() == 2);
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ IRBuilder<> IRB(&I);
+ Value *S1 = getShadow(&I, 0);
+ Value *S2 = getShadow(&I, 1);
+ assert(isX86_MMX || S1->getType()->isVectorTy());
+
+ // SExt and ICmpNE below must apply to individual elements of input vectors.
+ // In case of x86mmx arguments, cast them to appropriate vector types and
+ // back.
+ Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
+ if (isX86_MMX) {
+ S1 = IRB.CreateBitCast(S1, T);
+ S2 = IRB.CreateBitCast(S2, T);
+ }
+ Value *S1_ext = IRB.CreateSExt(
+ IRB.CreateICmpNE(S1, llvm::Constant::getNullValue(T)), T);
+ Value *S2_ext = IRB.CreateSExt(
+ IRB.CreateICmpNE(S2, llvm::Constant::getNullValue(T)), T);
+ if (isX86_MMX) {
+ Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
+ S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
+ S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
+ }
+
+ Function *ShadowFn = Intrinsic::getDeclaration(
+ F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
+
+ Value *S = IRB.CreateCall2(ShadowFn, S1_ext, S2_ext, "_msprop_vector_pack");
+ if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // \brief Instrument sum-of-absolute-differencies intrinsic.
+ void handleVectorSadIntrinsic(IntrinsicInst &I) {
+ const unsigned SignificantBitsPerResultElement = 16;
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
+ unsigned ZeroBitsPerResultElement =
+ ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
+
+ IRBuilder<> IRB(&I);
+ Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
+ // \brief Instrument multiply-add intrinsic.
+ void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+ unsigned EltSizeInBits = 0) {
+ bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
+ Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
+ IRBuilder<> IRB(&I);
+ Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1));
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
+ setOriginForNaryOp(I);
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case llvm::Intrinsic::bswap:
@@ -2062,6 +2210,47 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// case llvm::Intrinsic::x86_sse2_psll_dq_bs:
// case llvm::Intrinsic::x86_sse2_psrl_dq_bs:
+ case llvm::Intrinsic::x86_sse2_packsswb_128:
+ case llvm::Intrinsic::x86_sse2_packssdw_128:
+ case llvm::Intrinsic::x86_sse2_packuswb_128:
+ case llvm::Intrinsic::x86_sse41_packusdw:
+ case llvm::Intrinsic::x86_avx2_packsswb:
+ case llvm::Intrinsic::x86_avx2_packssdw:
+ case llvm::Intrinsic::x86_avx2_packuswb:
+ case llvm::Intrinsic::x86_avx2_packusdw:
+ handleVectorPackIntrinsic(I);
+ break;
+
+ case llvm::Intrinsic::x86_mmx_packsswb:
+ case llvm::Intrinsic::x86_mmx_packuswb:
+ handleVectorPackIntrinsic(I, 16);
+ break;
+
+ case llvm::Intrinsic::x86_mmx_packssdw:
+ handleVectorPackIntrinsic(I, 32);
+ break;
+
+ case llvm::Intrinsic::x86_mmx_psad_bw:
+ case llvm::Intrinsic::x86_sse2_psad_bw:
+ case llvm::Intrinsic::x86_avx2_psad_bw:
+ handleVectorSadIntrinsic(I);
+ break;
+
+ case llvm::Intrinsic::x86_sse2_pmadd_wd:
+ case llvm::Intrinsic::x86_avx2_pmadd_wd:
+ case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128:
+ case llvm::Intrinsic::x86_avx2_pmadd_ub_sw:
+ handleVectorPmaddIntrinsic(I);
+ break;
+
+ case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw:
+ handleVectorPmaddIntrinsic(I, 8);
+ break;
+
+ case llvm::Intrinsic::x86_mmx_pmadd_wd:
+ handleVectorPmaddIntrinsic(I, 16);
+ break;
+
default:
if (!handleUnknownIntrinsic(I))
visitInstruction(I);
@@ -2083,12 +2272,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return;
}
- // Allow only tail calls with the same types, otherwise
- // we may have a false positive: shadow for a non-void RetVal
- // will get propagated to a void RetVal.
- if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
- Call->setTailCall(false);
-
assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
// We are going to insert code that relies on the fact that the callee
@@ -2211,6 +2394,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void visitPHINode(PHINode &I) {
IRBuilder<> IRB(&I);
+ if (!PropagateShadow) {
+ setShadow(&I, getCleanShadow(&I));
+ return;
+ }
+
ShadowPHINodes.push_back(&I);
setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
"_msphi_s"));
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 8fe9bca..89386a6 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -40,14 +40,11 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include "llvm/Transforms/Utils/SpecialCaseList.h"
using namespace llvm;
#define DEBUG_TYPE "tsan"
-static cl::opt<std::string> ClBlacklistFile("tsan-blacklist",
- cl::desc("Blacklist file"), cl::Hidden);
static cl::opt<bool> ClInstrumentMemoryAccesses(
"tsan-instrument-memory-accesses", cl::init(true),
cl::desc("Instrument memory accesses"), cl::Hidden);
@@ -76,11 +73,7 @@ namespace {
/// ThreadSanitizer: instrument the code in module to find races.
struct ThreadSanitizer : public FunctionPass {
- ThreadSanitizer(StringRef BlacklistFile = StringRef())
- : FunctionPass(ID),
- DL(nullptr),
- BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
- : BlacklistFile) { }
+ ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {}
const char *getPassName() const override;
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;
@@ -98,8 +91,6 @@ struct ThreadSanitizer : public FunctionPass {
const DataLayout *DL;
Type *IntptrTy;
- SmallString<64> BlacklistFile;
- std::unique_ptr<SpecialCaseList> BL;
IntegerType *OrdTy;
// Callbacks to run-time library are computed in doInitialization.
Function *TsanFuncEntry;
@@ -129,8 +120,8 @@ const char *ThreadSanitizer::getPassName() const {
return "ThreadSanitizer";
}
-FunctionPass *llvm::createThreadSanitizerPass(StringRef BlacklistFile) {
- return new ThreadSanitizer(BlacklistFile);
+FunctionPass *llvm::createThreadSanitizerPass() {
+ return new ThreadSanitizer();
}
static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
@@ -228,7 +219,6 @@ bool ThreadSanitizer::doInitialization(Module &M) {
if (!DLP)
report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
- BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
// Always insert a call to __tsan_init into the module's CTORs.
IRBuilder<> IRB(M.getContext());
@@ -322,7 +312,6 @@ static bool isAtomic(Instruction *I) {
bool ThreadSanitizer::runOnFunction(Function &F) {
if (!DL) return false;
- if (BL->isIn(F)) return false;
initializeCallbacks(*F.getParent());
SmallVector<Instruction*, 8> RetVec;
SmallVector<Instruction*, 8> AllLoadsAndStores;
@@ -331,22 +320,20 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 8> MemIntrinCalls;
bool Res = false;
bool HasCalls = false;
+ bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
// Traverse all instructions, collect loads/stores/returns, check for calls.
- for (Function::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI) {
- BasicBlock &BB = *FI;
- for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
- BI != BE; ++BI) {
- if (isAtomic(BI))
- AtomicAccesses.push_back(BI);
- else if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
- LocalLoadsAndStores.push_back(BI);
- else if (isa<ReturnInst>(BI))
- RetVec.push_back(BI);
- else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
- if (isa<MemIntrinsic>(BI))
- MemIntrinCalls.push_back(BI);
+ for (auto &BB : F) {
+ for (auto &Inst : BB) {
+ if (isAtomic(&Inst))
+ AtomicAccesses.push_back(&Inst);
+ else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+ LocalLoadsAndStores.push_back(&Inst);
+ else if (isa<ReturnInst>(Inst))
+ RetVec.push_back(&Inst);
+ else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ if (isa<MemIntrinsic>(Inst))
+ MemIntrinCalls.push_back(&Inst);
HasCalls = true;
chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
}
@@ -358,21 +345,22 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
// FIXME: many of these accesses do not need to be checked for races
// (e.g. variables that do not escape, etc).
- // Instrument memory accesses.
- if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread))
- for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
- Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
+ // Instrument memory accesses only if we want to report bugs in the function.
+ if (ClInstrumentMemoryAccesses && SanitizeFunction)
+ for (auto Inst : AllLoadsAndStores) {
+ Res |= instrumentLoadOrStore(Inst);
}
- // Instrument atomic memory accesses.
+ // Instrument atomic memory accesses in any case (they can be used to
+ // implement synchronization).
if (ClInstrumentAtomics)
- for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) {
- Res |= instrumentAtomic(AtomicAccesses[i]);
+ for (auto Inst : AtomicAccesses) {
+ Res |= instrumentAtomic(Inst);
}
- if (ClInstrumentMemIntrinsics)
- for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) {
- Res |= instrumentMemIntrinsic(MemIntrinCalls[i]);
+ if (ClInstrumentMemIntrinsics && SanitizeFunction)
+ for (auto Inst : MemIntrinCalls) {
+ Res |= instrumentMemIntrinsic(Inst);
}
// Instrument function entry/exit points if there were instrumented accesses.
@@ -382,8 +370,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
IRB.getInt32(0));
IRB.CreateCall(TsanFuncEntry, ReturnAddress);
- for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
- IRBuilder<> IRBRet(RetVec[i]);
+ for (auto RetInst : RetVec) {
+ IRBuilder<> IRBRet(RetInst);
IRBRet.CreateCall(TsanFuncExit);
}
Res = true;
@@ -543,8 +531,14 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false),
createOrdering(&IRB, CASI->getSuccessOrdering()),
createOrdering(&IRB, CASI->getFailureOrdering())};
- CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args));
- ReplaceInstWithInst(I, C);
+ CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args);
+ Value *Success = IRB.CreateICmpEQ(C, CASI->getCompareOperand());
+
+ Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), C, 0);
+ Res = IRB.CreateInsertValue(Res, Success, 1);
+
+ I->replaceAllUsesWith(Res);
+ I->eraseFromParent();
} else if (FenceInst *FI = dyn_cast<FenceInst>(I)) {
Value *Args[] = {createOrdering(&IRB, FI->getOrdering())};
Function *F = FI->getSynchScope() == SingleThread ?
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 079cc86..5e22de6 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -8,11 +8,11 @@ transforms_scalar_SRC_FILES := \
DCE.cpp \
DeadStoreElimination.cpp \
EarlyCSE.cpp \
- GlobalMerge.cpp \
GVN.cpp \
IndVarSimplify.cpp \
JumpThreading.cpp \
LICM.cpp \
+ LoadCombine.cpp \
LoopDeletion.cpp \
LoopIdiomRecognize.cpp \
LoopInstSimplify.cpp \
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 3ad1488..2dcfa23 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -8,10 +8,10 @@ add_llvm_library(LLVMScalarOpts
EarlyCSE.cpp
FlattenCFGPass.cpp
GVN.cpp
- GlobalMerge.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
+ LoadCombine.cpp
LoopDeletion.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 6d07ddd..106eba0 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1464,6 +1464,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
continue;
}
+ // Loading from calloc (which zero initializes memory) -> zero
+ if (isCallocLikeFn(DepInst, TLI)) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::get(
+ DepBB, Constant::getNullValue(LI->getType())));
+ continue;
+ }
+
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
@@ -1791,6 +1798,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
case LLVMContext::MD_fpmath:
ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD));
break;
+ case LLVMContext::MD_invariant_load:
+ // Only set the !invariant.load if it is present in both instructions.
+ ReplInst->setMetadata(Kind, IMD);
+ break;
}
}
}
@@ -1988,6 +1999,15 @@ bool GVN::processLoad(LoadInst *L) {
}
}
+ // If this load follows a calloc (which zero initializes memory),
+ // then the loaded value is zero
+ if (isCallocLikeFn(DepInst, TLI)) {
+ L->replaceAllUsesWith(Constant::getNullValue(L->getType()));
+ markInstructionForDeletion(L);
+ ++NumGVNLoad;
+ return true;
+ }
+
return false;
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 230a381..6e50d33 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -158,6 +158,15 @@ bool JumpThreading::runOnFunction(Function &F) {
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
+ // Remove unreachable blocks from function as they may result in infinite
+ // loop. We do threading if we found something profitable. Jump threading a
+ // branch can create other opportunities. If these opportunities form a cycle
+ // i.e. if any jump treading is undoing previous threading in the path, then
+ // we will loop forever. We take care of this issue by not jump threading for
+ // back edges. This works for normal cases but not for unreachable blocks as
+ // they may have cycle with no back edge.
+ removeUnreachableBlocks(F);
+
FindLoopHeaders(F);
bool Changed, EverChanged = false;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 0a8d16f..abcceb2 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -192,6 +192,14 @@ namespace {
SmallVectorImpl<BasicBlock*> &ExitBlocks,
SmallVectorImpl<Instruction*> &InsertPts,
PredIteratorCache &PIC);
+
+ /// \brief Create a copy of the instruction in the exit block and patch up
+ /// SSA.
+ /// PN is a user of I in ExitBlock that can be used to get the number and
+ /// list of predecessors fast.
+ Instruction *CloneInstructionInExitBlock(Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN);
};
}
@@ -531,6 +539,35 @@ bool LICM::isNotUsedInLoop(Instruction &I) {
return true;
}
+Instruction *LICM::CloneInstructionInExitBlock(Instruction &I,
+ BasicBlock &ExitBlock,
+ PHINode &PN) {
+ Instruction *New = I.clone();
+ ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New);
+ if (!I.getName().empty()) New->setName(I.getName() + ".le");
+
+ // Build LCSSA PHI nodes for any in-loop operands. Note that this is
+ // particularly cheap because we can rip off the PHI node that we're
+ // replacing for the number and blocks of the predecessors.
+ // OPT: If this shows up in a profile, we can instead finish sinking all
+ // invariant instructions, and then walk their operands to re-establish
+ // LCSSA. That will eliminate creating PHI nodes just to nuke them when
+ // sinking bottom-up.
+ for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
+ ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(*OI))
+ if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
+ if (!OLoop->contains(&PN)) {
+ PHINode *OpPN =
+ PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
+ OInst->getName() + ".lcssa", ExitBlock.begin());
+ for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+ OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
+ *OI = OpPN;
+ }
+ return New;
+}
+
/// sink - When an instruction is found to only be used outside of the loop,
/// this function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its
@@ -550,6 +587,9 @@ void LICM::sink(Instruction &I) {
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end());
#endif
+ // Clones of this instruction. Don't create more than one per exit block!
+ SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies;
+
// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.
@@ -561,30 +601,13 @@ void LICM::sink(Instruction &I) {
assert(ExitBlockSet.count(ExitBlock) &&
"The LCSSA PHI is not in an exit block!");
- Instruction *New = I.clone();
- ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New);
- if (!I.getName().empty())
- New->setName(I.getName() + ".le");
-
- // Build LCSSA PHI nodes for any in-loop operands. Note that this is
- // particularly cheap because we can rip off the PHI node that we're
- // replacing for the number and blocks of the predecessors.
- // OPT: If this shows up in a profile, we can instead finish sinking all
- // invariant instructions, and then walk their operands to re-establish
- // LCSSA. That will eliminate creating PHI nodes just to nuke them when
- // sinking bottom-up.
- for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE;
- ++OI)
- if (Instruction *OInst = dyn_cast<Instruction>(*OI))
- if (Loop *OLoop = LI->getLoopFor(OInst->getParent()))
- if (!OLoop->contains(PN)) {
- PHINode *OpPN = PHINode::Create(
- OInst->getType(), PN->getNumIncomingValues(),
- OInst->getName() + ".lcssa", ExitBlock->begin());
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- OpPN->addIncoming(OInst, PN->getIncomingBlock(i));
- *OI = OpPN;
- }
+ Instruction *New;
+ auto It = SunkCopies.find(ExitBlock);
+ if (It != SunkCopies.end())
+ New = It->second;
+ else
+ New = SunkCopies[ExitBlock] =
+ CloneInstructionInExitBlock(I, *ExitBlock, *PN);
PN->replaceAllUsesWith(New);
PN->eraseFromParent();
@@ -616,7 +639,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (isSafeToSpeculativelyExecute(&Inst))
+ if (isSafeToSpeculativelyExecute(&Inst, DL))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
new file mode 100644
index 0000000..846aa70
--- /dev/null
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -0,0 +1,268 @@
+//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This transformation combines adjacent loads.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "load-combine"
+
+STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining");
+STATISTIC(NumLoadsCombined, "Number of loads combined");
+
+namespace {
+struct PointerOffsetPair {
+ Value *Pointer;
+ uint64_t Offset;
+};
+
+struct LoadPOPPair {
+ LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
+ : Load(L), POP(P), InsertOrder(O) {}
+ LoadPOPPair() {}
+ LoadInst *Load;
+ PointerOffsetPair POP;
+ /// \brief The new load needs to be created before the first load in IR order.
+ unsigned InsertOrder;
+};
+
+class LoadCombine : public BasicBlockPass {
+ LLVMContext *C;
+ const DataLayout *DL;
+
+public:
+ LoadCombine()
+ : BasicBlockPass(ID),
+ C(nullptr), DL(nullptr) {
+ initializeSROAPass(*PassRegistry::getPassRegistry());
+ }
+ bool doInitialization(Function &) override;
+ bool runOnBasicBlock(BasicBlock &BB) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ const char *getPassName() const override { return "LoadCombine"; }
+ static char ID;
+
+ typedef IRBuilder<true, TargetFolder> BuilderTy;
+
+private:
+ BuilderTy *Builder;
+
+ PointerOffsetPair getPointerOffsetPair(LoadInst &);
+ bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &);
+ bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &);
+ bool combineLoads(SmallVectorImpl<LoadPOPPair> &);
+};
+}
+
+bool LoadCombine::doInitialization(Function &F) {
+ DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n");
+ C = &F.getContext();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (!DLP) {
+ DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n");
+ return false;
+ }
+ DL = &DLP->getDataLayout();
+ return true;
+}
+
+PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) {
+ PointerOffsetPair POP;
+ POP.Pointer = LI.getPointerOperand();
+ POP.Offset = 0;
+ while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) {
+ unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType());
+ APInt Offset(BitWidth, 0);
+ if (GEP->accumulateConstantOffset(*DL, Offset))
+ POP.Offset += Offset.getZExtValue();
+ else
+ // Can't handle GEPs with variable indices.
+ return POP;
+ POP.Pointer = GEP->getPointerOperand();
+ } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer))
+ POP.Pointer = BC->getOperand(0);
+ }
+ return POP;
+}
+
+bool LoadCombine::combineLoads(
+ DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) {
+ bool Combined = false;
+ for (auto &Loads : LoadMap) {
+ if (Loads.second.size() < 2)
+ continue;
+ std::sort(Loads.second.begin(), Loads.second.end(),
+ [](const LoadPOPPair &A, const LoadPOPPair &B) {
+ return A.POP.Offset < B.POP.Offset;
+ });
+ if (aggregateLoads(Loads.second))
+ Combined = true;
+ }
+ return Combined;
+}
+
+/// \brief Try to aggregate loads from a sorted list of loads to be combined.
+///
+/// It is guaranteed that no writes occur between any of the loads. All loads
+/// have the same base pointer. There are at least two loads.
+bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
+ assert(Loads.size() >= 2 && "Insufficient loads!");
+ LoadInst *BaseLoad = nullptr;
+ SmallVector<LoadPOPPair, 8> AggregateLoads;
+ bool Combined = false;
+ uint64_t PrevOffset = -1ull;
+ uint64_t PrevSize = 0;
+ for (auto &L : Loads) {
+ if (PrevOffset == -1ull) {
+ BaseLoad = L.Load;
+ PrevOffset = L.POP.Offset;
+ PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ AggregateLoads.push_back(L);
+ continue;
+ }
+ if (L.Load->getAlignment() > BaseLoad->getAlignment())
+ continue;
+ if (L.POP.Offset > PrevOffset + PrevSize) {
+ // No other load will be combinable
+ if (combineLoads(AggregateLoads))
+ Combined = true;
+ AggregateLoads.clear();
+ PrevOffset = -1;
+ continue;
+ }
+ if (L.POP.Offset != PrevOffset + PrevSize)
+ // This load is offset less than the size of the last load.
+ // FIXME: We may want to handle this case.
+ continue;
+ PrevOffset = L.POP.Offset;
+ PrevSize = DL->getTypeStoreSize(L.Load->getType());
+ AggregateLoads.push_back(L);
+ }
+ if (combineLoads(AggregateLoads))
+ Combined = true;
+ return Combined;
+}
+
+/// \brief Given a list of combinable load. Combine the maximum number of them.
+bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) {
+ // Remove loads from the end while the size is not a power of 2.
+ unsigned TotalSize = 0;
+ for (const auto &L : Loads)
+ TotalSize += L.Load->getType()->getPrimitiveSizeInBits();
+ while (TotalSize != 0 && !isPowerOf2_32(TotalSize))
+ TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits();
+ if (Loads.size() < 2)
+ return false;
+
+ DEBUG({
+ dbgs() << "***** Combining Loads ******\n";
+ for (const auto &L : Loads) {
+ dbgs() << L.POP.Offset << ": " << *L.Load << "\n";
+ }
+ });
+
+ // Find first load. This is where we put the new load.
+ LoadPOPPair FirstLP;
+ FirstLP.InsertOrder = -1u;
+ for (const auto &L : Loads)
+ if (L.InsertOrder < FirstLP.InsertOrder)
+ FirstLP = L;
+
+ unsigned AddressSpace =
+ FirstLP.POP.Pointer->getType()->getPointerAddressSpace();
+
+ Builder->SetInsertPoint(FirstLP.Load);
+ Value *Ptr = Builder->CreateConstGEP1_64(
+ Builder->CreatePointerCast(Loads[0].POP.Pointer,
+ Builder->getInt8PtrTy(AddressSpace)),
+ Loads[0].POP.Offset);
+ LoadInst *NewLoad = new LoadInst(
+ Builder->CreatePointerCast(
+ Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize),
+ Ptr->getType()->getPointerAddressSpace())),
+ Twine(Loads[0].Load->getName()) + ".combined", false,
+ Loads[0].Load->getAlignment(), FirstLP.Load);
+
+ for (const auto &L : Loads) {
+ Builder->SetInsertPoint(L.Load);
+ Value *V = Builder->CreateExtractInteger(
+ *DL, NewLoad, cast<IntegerType>(L.Load->getType()),
+ L.POP.Offset - Loads[0].POP.Offset, "combine.extract");
+ L.Load->replaceAllUsesWith(V);
+ }
+
+ NumLoadsCombined = NumLoadsCombined + Loads.size();
+ return true;
+}
+
+bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
+ if (skipOptnoneFunction(BB) || !DL)
+ return false;
+
+ IRBuilder<true, TargetFolder>
+ TheBuilder(BB.getContext(), TargetFolder(DL));
+ Builder = &TheBuilder;
+
+ DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap;
+
+ bool Combined = false;
+ unsigned Index = 0;
+ for (auto &I : BB) {
+ if (I.mayWriteToMemory() || I.mayThrow()) {
+ if (combineLoads(LoadMap))
+ Combined = true;
+ LoadMap.clear();
+ continue;
+ }
+ LoadInst *LI = dyn_cast<LoadInst>(&I);
+ if (!LI)
+ continue;
+ ++NumLoadsAnalyzed;
+ if (!LI->isSimple() || !LI->getType()->isIntegerTy())
+ continue;
+ auto POP = getPointerOffsetPair(*LI);
+ if (!POP.Pointer)
+ continue;
+ LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
+ }
+ if (combineLoads(LoadMap))
+ Combined = true;
+ return Combined;
+}
+
+void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+}
+
+char LoadCombine::ID = 0;
+
+BasicBlockPass *llvm::createLoadCombinePass() {
+ return new LoadCombine();
+}
+
+INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false,
+ false)
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 26a83df..a12f5a7 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -112,7 +112,7 @@ namespace {
/// the variable involved in the comparion is returned. This function will
/// be called to see if the precondition and postcondition of the loop
/// are in desirable form.
- Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
+ Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
/// Return true iff the idiom is detected in the loop. and 1) \p CntInst
/// is set to the instruction counting the population bit. 2) \p CntPhi
@@ -122,7 +122,7 @@ namespace {
(Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
/// Insert ctpop intrinsic function and some obviously dead instructions.
- void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
+ void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
/// Create llvm.ctpop.* intrinsic function.
CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 8b5e036..b6fbb16 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -924,8 +924,10 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
// them, and this matching fails. As an exception, we allow the alias
// set tracker to handle regular (simple) load/store dependencies.
if (FutureSideEffects &&
- ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) ||
- (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) {
+ ((!isSimpleLoadStore(J1) &&
+ !isSafeToSpeculativelyExecute(J1, DL)) ||
+ (!isSimpleLoadStore(J2) &&
+ !isSafeToSpeculativelyExecute(J2, DL)))) {
DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 <<
" vs. " << *J2 <<
" (side effects prevent reordering)\n");
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index fc28fd2..00c0f88 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -18,8 +18,10 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -36,7 +38,8 @@ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
static cl::opt<unsigned>
UnrollCount("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
+ cl::desc("Use this unroll count for all loops including those with "
+ "unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
@@ -47,6 +50,11 @@ static cl::opt<bool>
UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
+static cl::opt<unsigned>
+PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
+ cl::desc("Unrolled size limit for loops with an unroll(enable) or "
+ "unroll_count pragma."));
+
namespace {
class LoopUnroll : public LoopPass {
public:
@@ -109,6 +117,66 @@ namespace {
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTreeWrapperPass>();
}
+
+ // Fill in the UnrollingPreferences parameter with values from the
+ // TargetTransformationInfo.
+ void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
+ TargetTransformInfo::UnrollingPreferences &UP) {
+ UP.Threshold = CurrentThreshold;
+ UP.OptSizeThreshold = OptSizeUnrollThreshold;
+ UP.PartialThreshold = CurrentThreshold;
+ UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
+ UP.Count = CurrentCount;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = CurrentAllowPartial;
+ UP.Runtime = CurrentRuntime;
+ TTI.getUnrollingPreferences(L, UP);
+ }
+
+ // Select and return an unroll count based on parameters from
+ // user, unroll preferences, unroll pragmas, or a heuristic.
+ // SetExplicitly is set to true if the unroll count is is set by
+ // the user or a pragma rather than selected heuristically.
+ unsigned
+ selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ unsigned PragmaCount,
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ bool &SetExplicitly);
+
+
+ // Select threshold values used to limit unrolling based on a
+ // total unrolled size. Parameters Threshold and PartialThreshold
+ // are set to the maximum unrolled size for fully and partially
+ // unrolled loops respectively.
+ void selectThresholds(const Loop *L, bool HasPragma,
+ const TargetTransformInfo::UnrollingPreferences &UP,
+ unsigned &Threshold, unsigned &PartialThreshold) {
+ // Determine the current unrolling threshold. While this is
+ // normally set from UnrollThreshold, it is overridden to a
+ // smaller value if the current function is marked as
+ // optimize-for-size, and the unroll threshold was not user
+ // specified.
+ Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
+ PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
+ if (!UserThreshold &&
+ L->getHeader()->getParent()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize)) {
+ Threshold = UP.OptSizeThreshold;
+ PartialThreshold = UP.PartialOptSizeThreshold;
+ }
+ if (HasPragma) {
+ // If the loop has an unrolling pragma, we want to be more
+ // aggressive with unrolling limits. Set thresholds to at
+ // least the PragmaTheshold value which is larger than the
+ // default limits.
+ if (Threshold != NoThreshold)
+ Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
+ if (PartialThreshold != NoThreshold)
+ PartialThreshold =
+ std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
+ }
+ }
};
}
@@ -151,6 +219,103 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
return LoopSize;
}
+// Returns the value associated with the given metadata node name (for
+// example, "llvm.loop.unroll.count"). If no such named metadata node
+// exists, then nullptr is returned.
+static const ConstantInt *GetUnrollMetadataValue(const Loop *L,
+ StringRef Name) {
+ MDNode *LoopID = L->getLoopID();
+ if (!LoopID) return nullptr;
+
+ // First operand should refer to the loop id itself.
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+ if (!MD) continue;
+
+ const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+ if (!S) continue;
+
+ if (Name.equals(S->getString())) {
+ assert(MD->getNumOperands() == 2 &&
+ "Unroll hint metadata should have two operands.");
+ return cast<ConstantInt>(MD->getOperand(1));
+ }
+ }
+ return nullptr;
+}
+
+// Returns true if the loop has an unroll(enable) pragma.
+static bool HasUnrollEnablePragma(const Loop *L) {
+ const ConstantInt *EnableValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
+ return (EnableValue && EnableValue->getZExtValue());
+}
+
+// Returns true if the loop has an unroll(disable) pragma.
+static bool HasUnrollDisablePragma(const Loop *L) {
+ const ConstantInt *EnableValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.enable");
+ return (EnableValue && !EnableValue->getZExtValue());
+}
+
+// If loop has an unroll_count pragma return the (necessarily
+// positive) value from the pragma. Otherwise return 0.
+static unsigned UnrollCountPragmaValue(const Loop *L) {
+ const ConstantInt *CountValue =
+ GetUnrollMetadataValue(L, "llvm.loop.unroll.count");
+ if (CountValue) {
+ unsigned Count = CountValue->getZExtValue();
+ assert(Count >= 1 && "Unroll count must be positive.");
+ return Count;
+ }
+ return 0;
+}
+
+unsigned LoopUnroll::selectUnrollCount(
+ const Loop *L, unsigned TripCount, bool HasEnablePragma,
+ unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
+ bool &SetExplicitly) {
+ SetExplicitly = true;
+
+ // User-specified count (either as a command-line option or
+ // constructor parameter) has highest precedence.
+ unsigned Count = UserCount ? CurrentCount : 0;
+
+ // If there is no user-specified count, unroll pragmas have the next
+ // highest precendence.
+ if (Count == 0) {
+ if (PragmaCount) {
+ Count = PragmaCount;
+ } else if (HasEnablePragma) {
+ // unroll(enable) pragma without an unroll_count pragma
+ // indicates to unroll loop fully.
+ Count = TripCount;
+ }
+ }
+
+ if (Count == 0)
+ Count = UP.Count;
+
+ if (Count == 0) {
+ SetExplicitly = false;
+ if (TripCount == 0)
+ // Runtime trip count.
+ Count = UnrollRuntimeCount;
+ else
+ // Conservative heuristic: if we know the trip count, see if we can
+ // completely unroll (subject to the threshold, checked below); otherwise
+ // try to find greatest modulo of the trip count which is still under
+ // threshold value.
+ Count = TripCount;
+ }
+ if (TripCount && Count > TripCount)
+ return TripCount;
+ return Count;
+}
+
bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
@@ -162,33 +327,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
- (void)Header;
- TargetTransformInfo::UnrollingPreferences UP;
- UP.Threshold = CurrentThreshold;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.PartialThreshold = CurrentThreshold;
- UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.MaxCount = UINT_MAX;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- TTI.getUnrollingPreferences(L, UP);
-
- // Determine the current unrolling threshold. While this is normally set
- // from UnrollThreshold, it is overridden to a smaller value if the current
- // function is marked as optimize-for-size, and the unroll threshold was
- // not user specified.
- unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- unsigned PartialThreshold =
- UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- if (!UserThreshold &&
- Header->getParent()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize)) {
- Threshold = UP.OptSizeThreshold;
- PartialThreshold = UP.PartialOptSizeThreshold;
+ if (HasUnrollDisablePragma(L)) {
+ return false;
}
+ bool HasEnablePragma = HasUnrollEnablePragma(L);
+ unsigned PragmaCount = UnrollCountPragmaValue(L);
+ bool HasPragma = HasEnablePragma || PragmaCount > 0;
+
+ TargetTransformInfo::UnrollingPreferences UP;
+ getUnrollingPreferences(L, TTI, UP);
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
@@ -202,79 +350,117 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
}
- bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime;
-
- // Use a default unroll-count if the user doesn't specify a value
- // and the trip count is a run-time value. The default is different
- // for run-time or compile-time trip count loops.
- unsigned Count = UserCount ? CurrentCount : UP.Count;
- if (Runtime && Count == 0 && TripCount == 0)
- Count = UnrollRuntimeCount;
+ // Select an initial unroll count. This may be reduced later based
+ // on size thresholds.
+ bool CountSetExplicitly;
+ unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount,
+ UP, CountSetExplicitly);
+
+ unsigned NumInlineCandidates;
+ bool notDuplicatable;
+ unsigned LoopSize =
+ ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
+ DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
+ uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
+ if (notDuplicatable) {
+ DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
+ << " instructions.\n");
+ return false;
+ }
+ if (NumInlineCandidates != 0) {
+ DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ return false;
+ }
- if (Count == 0) {
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- if (TripCount == 0)
- return false;
- Count = TripCount;
+ unsigned Threshold, PartialThreshold;
+ selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold);
+
+ // Given Count, TripCount and thresholds determine the type of
+ // unrolling which is to be performed.
+ enum { Full = 0, Partial = 1, Runtime = 2 };
+ int Unrolling;
+ if (TripCount && Count == TripCount) {
+ if (Threshold != NoThreshold && UnrolledSize > Threshold) {
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << UnrolledSize << ">" << Threshold
+ << "\n");
+ Unrolling = Partial;
+ } else {
+ Unrolling = Full;
+ }
+ } else if (TripCount && Count < TripCount) {
+ Unrolling = Partial;
+ } else {
+ Unrolling = Runtime;
}
- // Enforce the threshold.
- if (Threshold != NoThreshold && PartialThreshold != NoThreshold) {
- unsigned NumInlineCandidates;
- bool notDuplicatable;
- unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
- notDuplicatable, TTI);
- DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
- if (notDuplicatable) {
- DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable"
- << " instructions.\n");
+ // Reduce count based on the type of unrolling and the threshold values.
+ unsigned OriginalCount = Count;
+ bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime;
+ if (Unrolling == Partial) {
+ bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+ if (!AllowPartial && !CountSetExplicitly) {
+ DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
return false;
}
- if (NumInlineCandidates != 0) {
- DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
+ if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ Count = PartialThreshold / LoopSize;
+ while (Count != 0 && TripCount % Count != 0)
+ Count--;
+ }
+ } else if (Unrolling == Runtime) {
+ if (!AllowRuntime && !CountSetExplicitly) {
+ DEBUG(dbgs() << " will not try to unroll loop with runtime trip count "
+ << "-unroll-runtime not given\n");
return false;
}
- uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 &&
- (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) {
- if (Size > Threshold)
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
-
- bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
- if (!AllowPartial && !(Runtime && TripCount == 0)) {
- DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- return false;
- }
- if (TripCount) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = PartialThreshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0)
- Count--;
- }
- else if (Runtime) {
- // Reduce unroll count to be a lower power-of-two value
- while (Count != 0 && Size > PartialThreshold) {
- Count >>= 1;
- Size = LoopSize*Count;
- }
- }
- if (Count > UP.MaxCount)
- Count = UP.MaxCount;
- if (Count < 2) {
- DEBUG(dbgs() << " could not unroll partially\n");
- return false;
+ // Reduce unroll count to be the largest power-of-two factor of
+ // the original count which satisfies the threshold limit.
+ while (Count != 0 && UnrolledSize > PartialThreshold) {
+ Count >>= 1;
+ UnrolledSize = LoopSize * Count;
+ }
+ if (Count > UP.MaxCount)
+ Count = UP.MaxCount;
+ DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ }
+
+ if (HasPragma) {
+ // Emit optimization remarks if we are unable to unroll the loop
+ // as directed by a pragma.
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = Header->getParent();
+ LLVMContext &Ctx = F->getContext();
+ if (HasEnablePragma && PragmaCount == 0) {
+ if (TripCount && Count != TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "because unrolled size is too large.");
+ } else if (!TripCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to fully unroll loop as directed by unroll(enable) pragma "
+ "because loop has a runtime trip count.");
}
- DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n");
+ } else if (PragmaCount > 0 && Count != OriginalCount) {
+ emitOptimizationRemarkMissed(
+ Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "Unable to unroll loop the number of times directed by "
+ "unroll_count pragma because unrolled size is too large.");
}
}
+ if (Unrolling != Full && Count < 2) {
+ // Partial unrolling by 1 is a nop. For full unrolling, a factor
+ // of 1 makes sense because loop control can be eliminated.
+ return false;
+ }
+
// Unroll the loop.
- if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM))
+ if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM))
return false;
return true;
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 4251ac4..3314e1e 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -32,7 +32,10 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
Value *Res = Builder.CreateSelect(Equal, Val, Orig);
Builder.CreateStore(Res, Ptr);
- CXI->replaceAllUsesWith(Orig);
+ Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0);
+ Res = Builder.CreateInsertValue(Res, Equal, 1);
+
+ CXI->replaceAllUsesWith(Res);
CXI->eraseFromParent();
return true;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 986d6a4..ea2cf7c 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -1368,11 +1368,10 @@ Value *Reassociate::OptimizeXor(Instruction *I,
Value *Reassociate::OptimizeAdd(Instruction *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Scan the operand lists looking for X and -X pairs. If we find any, we
- // can simplify the expression. X+-X == 0. While we're at it, scan for any
+ // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it,
+ // scan for any
// duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
- //
- // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
- //
+
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
Value *TheOp = Ops[i].Op;
// Check to see if we've seen this operand before. If so, we factor all
@@ -1412,19 +1411,28 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
continue;
}
- // Check for X and -X in the operand list.
- if (!BinaryOperator::isNeg(TheOp))
+ // Check for X and -X or X and ~X in the operand list.
+ if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isNot(TheOp))
continue;
- Value *X = BinaryOperator::getNegArgument(TheOp);
+ Value *X = nullptr;
+ if (BinaryOperator::isNeg(TheOp))
+ X = BinaryOperator::getNegArgument(TheOp);
+ else if (BinaryOperator::isNot(TheOp))
+ X = BinaryOperator::getNotArgument(TheOp);
+
unsigned FoundX = FindInOperandList(Ops, i, X);
if (FoundX == i)
continue;
// Remove X and -X from the operand list.
- if (Ops.size() == 2)
+ if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp))
return Constant::getNullValue(X->getType());
+ // Remove X and ~X from the operand list.
+ if (Ops.size() == 2 && BinaryOperator::isNot(TheOp))
+ return Constant::getAllOnesValue(X->getType());
+
Ops.erase(Ops.begin()+i);
if (i < FoundX)
--FoundX;
@@ -1434,6 +1442,13 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
++NumAnnihil;
--i; // Revisit element.
e -= 2; // Removed two elements.
+
+ // if X and ~X we append -1 to the operand list.
+ if (BinaryOperator::isNot(TheOp)) {
+ Value *V = Constant::getAllOnesValue(X->getType());
+ Ops.insert(Ops.end(), ValueEntry(getRank(V), V));
+ e += 1;
+ }
}
// Scan the operand list, checking to see if there are any common factors
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index feeb231..90c3520 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -494,7 +494,9 @@ private:
void visitResumeInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitFenceInst (FenceInst &I) { /*returns void*/ }
- void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); }
+ void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+ markAnythingOverdefined(&I);
+ }
void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 04bf4f8..8c7f253 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1032,11 +1032,6 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
UserTy = SI->getValueOperand()->getType();
}
- if (!UserTy || (Ty && Ty != UserTy))
- TyIsCommon = false; // Give up on anything but an iN type.
- else
- Ty = UserTy;
-
if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
// If the type is larger than the partition, skip it. We only encounter
// this for split integer operations where we want to use the type of the
@@ -1051,6 +1046,13 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
ITy = UserITy;
}
+
+ // To avoid depending on the order of slices, Ty and TyIsCommon must not
+ // depend on types skipped above.
+ if (!UserTy || (Ty && Ty != UserTy))
+ TyIsCommon = false; // Give up on anything but an iN type.
+ else
+ Ty = UserTy;
}
return TyIsCommon ? Ty : ITy;
@@ -1128,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (InVal->isDereferenceablePointer() ||
+ if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
continue;
@@ -1196,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI,
const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer();
- bool FDerefable = FValue->isDereferenceablePointer();
+ bool TDerefable = TValue->isDereferenceablePointer(DL);
+ bool FDerefable = FValue->isDereferenceablePointer(DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 8e557aa..73c97ff 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -450,13 +450,14 @@ void SampleModuleProfile::dump() {
///
/// \returns true if the file was loaded successfully, false otherwise.
bool SampleModuleProfile::loadText() {
- std::unique_ptr<MemoryBuffer> Buffer;
- error_code EC = MemoryBuffer::getFile(Filename, Buffer);
- if (EC) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+ MemoryBuffer::getFile(Filename);
+ if (std::error_code EC = BufferOrErr.getError()) {
std::string Msg(EC.message());
M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
return false;
}
+ std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
line_iterator LineIt(*Buffer, '#');
// Read the profile of each function. Since each function may be
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index f8f828c..edf012d 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -65,6 +65,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
initializeSeparateConstOffsetFromGEPPass(Registry);
+ initializeLoadCombinePass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 58192fc..e2a24a7 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1142,8 +1142,8 @@ public:
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
- bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
- bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
- if (InVal->isDereferenceablePointer() ||
+ if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
continue;
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index b8529e1..62f2026 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -121,41 +121,75 @@ class ConstantOffsetExtractor {
/// numeric value of the extracted constant offset (0 if failed), and a
/// new index representing the remainder (equal to the original index minus
/// the constant offset).
- /// \p Idx The given GEP index
- /// \p NewIdx The new index to replace
- /// \p DL The datalayout of the module
- /// \p IP Calculating the new index requires new instructions. IP indicates
- /// where to insert them (typically right before the GEP).
+ /// \p Idx The given GEP index
+ /// \p NewIdx The new index to replace (output)
+ /// \p DL The datalayout of the module
+ /// \p GEP The given GEP
static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
- Instruction *IP);
+ GetElementPtrInst *GEP);
/// Looks for a constant offset without extracting it. The meaning of the
/// arguments and the return value are the same as Extract.
- static int64_t Find(Value *Idx, const DataLayout *DL);
+ static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
private:
ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
: DL(Layout), IP(InsertionPt) {}
- /// Searches the expression that computes V for a constant offset. If the
- /// searching is successful, update UserChain as a path from V to the constant
- /// offset.
- int64_t find(Value *V);
- /// A helper function to look into both operands of a binary operator U.
- /// \p IsSub Whether U is a sub operator. If so, we need to negate the
- /// constant offset at some point.
- int64_t findInEitherOperand(User *U, bool IsSub);
- /// After finding the constant offset and how it is reached from the GEP
- /// index, we build a new index which is a clone of the old one except the
- /// constant offset is removed. For example, given (a + (b + 5)) and knowning
- /// the constant offset is 5, this function returns (a + b).
+ /// Searches the expression that computes V for a non-zero constant C s.t.
+ /// V can be reassociated into the form V' + C. If the searching is
+ /// successful, returns C and update UserChain as a def-use chain from C to V;
+ /// otherwise, UserChain is empty.
///
- /// We cannot simply change the constant to zero because the expression that
- /// computes the index or its intermediate result may be used by others.
- Value *rebuildWithoutConstantOffset();
- // A helper function for rebuildWithoutConstantOffset that rebuilds the direct
- // user (U) of the constant offset (C).
- Value *rebuildLeafWithoutConstantOffset(User *U, Value *C);
- /// Returns a clone of U except the first occurrence of From with To.
- Value *cloneAndReplace(User *U, Value *From, Value *To);
+ /// \p V The given expression
+ /// \p SignExtended Whether V will be sign-extended in the computation of the
+ /// GEP index
+ /// \p ZeroExtended Whether V will be zero-extended in the computation of the
+ /// GEP index
+ /// \p NonNegative Whether V is guaranteed to be non-negative. For example,
+ /// an index of an inbounds GEP is guaranteed to be
+ /// non-negative. Levaraging this, we can better split
+ /// inbounds GEPs.
+ APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
+ /// A helper function to look into both operands of a binary operator.
+ APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
+ bool ZeroExtended);
+ /// After finding the constant offset C from the GEP index I, we build a new
+ /// index I' s.t. I' + C = I. This function builds and returns the new
+ /// index I' according to UserChain produced by function "find".
+ ///
+ /// The building conceptually takes two steps:
+ /// 1) iteratively distribute s/zext towards the leaves of the expression tree
+ /// that computes I
+ /// 2) reassociate the expression tree to the form I' + C.
+ ///
+ /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
+ /// sext to a, b and 5 so that we have
+ /// sext(a) + (sext(b) + 5).
+ /// Then, we reassociate it to
+ /// (sext(a) + sext(b)) + 5.
+ /// Given this form, we know I' is sext(a) + sext(b).
+ Value *rebuildWithoutConstOffset();
+ /// After the first step of rebuilding the GEP index without the constant
+ /// offset, distribute s/zext to the operands of all operators in UserChain.
+ /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
+ /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
+ ///
+ /// The function also updates UserChain to point to new subexpressions after
+ /// distributing s/zext. e.g., the old UserChain of the above example is
+ /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
+ /// and the new UserChain is
+ /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
+ /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
+ ///
+ /// \p ChainIndex The index to UserChain. ChainIndex is initially
+ /// UserChain.size() - 1, and is decremented during
+ /// the recursion.
+ Value *distributeExtsAndCloneChain(unsigned ChainIndex);
+ /// Reassociates the GEP index to the form I' + C and returns I'.
+ Value *removeConstOffset(unsigned ChainIndex);
+ /// A helper function to apply ExtInsts, a list of s/zext, to value V.
+ /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
+ /// returns "sext i32 (zext i16 V to i32) to i64".
+ Value *applyExts(Value *V);
/// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
bool NoCommonBits(Value *LHS, Value *RHS) const;
@@ -163,20 +197,26 @@ class ConstantOffsetExtractor {
/// \p KnownOne Mask of all bits that are known to be one.
/// \p KnownZero Mask of all bits that are known to be zero.
void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
- /// Finds the first use of Used in U. Returns -1 if not found.
- static unsigned FindFirstUse(User *U, Value *Used);
- /// Returns whether OPC (sext or zext) can be distributed to the operands of
- /// BO. e.g., sext can be distributed to the operands of an "add nsw" because
- /// sext (add nsw a, b) == add nsw (sext a), (sext b).
- static bool Distributable(unsigned OPC, BinaryOperator *BO);
+ /// A helper function that returns whether we can trace into the operands
+ /// of binary operator BO for a constant offset.
+ ///
+ /// \p SignExtended Whether BO is surrounded by sext
+ /// \p ZeroExtended Whether BO is surrounded by zext
+ /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
+ /// array index.
+ bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
+ bool NonNegative);
/// The path from the constant offset to the old GEP index. e.g., if the GEP
/// index is "a * b + (c + 5)". After running function find, UserChain[0] will
/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
/// UserChain[2] will be the entire expression "a * b + (c + 5)".
///
- /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index.
+ /// This path helps to rebuild the new GEP index.
SmallVector<User *, 8> UserChain;
+ /// A data structure used in rebuildWithoutConstOffset. Contains all
+ /// sext/zext instructions along UserChain.
+ SmallVector<CastInst *, 16> ExtInsts;
/// The data layout of the module. Used in ComputeKnownBits.
const DataLayout *DL;
Instruction *IP; /// Insertion position of cloned instructions.
@@ -196,6 +236,15 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
AU.addRequired<DataLayoutPass>();
AU.addRequired<TargetTransformInfo>();
}
+
+ bool doInitialization(Module &M) override {
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ if (DLP == nullptr)
+ report_fatal_error("data layout missing");
+ DL = &DLP->getDataLayout();
+ return false;
+ }
+
bool runOnFunction(Function &F) override;
private:
@@ -206,8 +255,42 @@ class SeparateConstOffsetFromGEP : public FunctionPass {
/// function only inspects the GEP without changing it. The output
/// NeedsExtraction indicates whether we can extract a non-zero constant
/// offset from any index.
- int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL,
- bool &NeedsExtraction);
+ int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
+ /// Canonicalize array indices to pointer-size integers. This helps to
+ /// simplify the logic of splitting a GEP. For example, if a + b is a
+ /// pointer-size integer, we have
+ /// gep base, a + b = gep (gep base, a), b
+ /// However, this equality may not hold if the size of a + b is smaller than
+ /// the pointer size, because LLVM conceptually sign-extends GEP indices to
+ /// pointer size before computing the address
+ /// (http://llvm.org/docs/LangRef.html#id181).
+ ///
+ /// This canonicalization is very likely already done in clang and
+ /// instcombine. Therefore, the program will probably remain the same.
+ ///
+ /// Returns true if the module changes.
+ ///
+ /// Verified in @i32_add in split-gep.ll
+ bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ /// For each array index that is in the form of zext(a), convert it to sext(a)
+ /// if we can prove zext(a) <= max signed value of typeof(a). We prefer
+ /// sext(a) to zext(a), because in the special case where x + y >= 0 and
+ /// (x >= 0 or y >= 0), function CanTraceInto can split sext(x + y),
+ /// while no such case exists for zext(x + y).
+ ///
+ /// Note that
+ /// zext(x + y) = zext(x) + zext(y)
+ /// is wrong, e.g.,
+ /// zext i32(UINT_MAX + 1) to i64 !=
+ /// (zext i32 UINT_MAX to i64) + (zext i32 1 to i64)
+ ///
+ /// Returns true if the module changes.
+ ///
+ /// Verified in @inbounds_zext_add in split-gep.ll and @sum_of_array3 in
+ /// split-gep-and-gvn.ll
+ bool convertInBoundsZExtToSExt(GetElementPtrInst *GEP);
+
+ const DataLayout *DL;
};
} // anonymous namespace
@@ -227,181 +310,272 @@ FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
return new SeparateConstOffsetFromGEP();
}
-bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) {
- assert(OPC == Instruction::SExt || OPC == Instruction::ZExt);
+bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
+ bool ZeroExtended,
+ BinaryOperator *BO,
+ bool NonNegative) {
+ // We only consider ADD, SUB and OR, because a non-zero constant found in
+ // expressions composed of these operations can be easily hoisted as a
+ // constant offset by reassociation.
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Sub &&
+ BO->getOpcode() != Instruction::Or) {
+ return false;
+ }
+
+ Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
+ // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
+ // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
+ if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS))
+ return false;
+
+ // In addition, tracing into BO requires that its surrounding s/zext (if
+ // any) is distributable to both operands.
+ //
+ // Suppose BO = A op B.
+ // SignExtended | ZeroExtended | Distributable?
+ // --------------+--------------+----------------------------------
+ // 0 | 0 | true because no s/zext exists
+ // 0 | 1 | zext(BO) == zext(A) op zext(B)
+ // 1 | 0 | sext(BO) == sext(A) op sext(B)
+ // 1 | 1 | zext(sext(BO)) ==
+ // | | zext(sext(A)) op zext(sext(B))
+ if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
+ // If a + b >= 0 and (a >= 0 or b >= 0), then
+ // sext(a + b) = sext(a) + sext(b)
+ // even if the addition is not marked nsw.
+ //
+ // Leveraging this invarient, we can trace into an sext'ed inbound GEP
+ // index if the constant offset is non-negative.
+ //
+ // Verified in @sext_add in split-gep.ll.
+ if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
+ if (!ConstLHS->isNegative())
+ return true;
+ }
+ if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
+ if (!ConstRHS->isNegative())
+ return true;
+ }
+ }
// sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
// zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
if (BO->getOpcode() == Instruction::Add ||
BO->getOpcode() == Instruction::Sub) {
- return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) ||
- (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap());
+ if (SignExtended && !BO->hasNoSignedWrap())
+ return false;
+ if (ZeroExtended && !BO->hasNoUnsignedWrap())
+ return false;
}
- // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B)
- // -instcombine also leverages this invariant to do the reverse
- // transformation to reduce integer casts.
- return BO->getOpcode() == Instruction::And ||
- BO->getOpcode() == Instruction::Or ||
- BO->getOpcode() == Instruction::Xor;
+ return true;
}
-int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) {
- assert(U->getNumOperands() == 2);
- int64_t ConstantOffset = find(U->getOperand(0));
+APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
+ bool SignExtended,
+ bool ZeroExtended) {
+ // BO being non-negative does not shed light on whether its operands are
+ // non-negative. Clear the NonNegative flag here.
+ APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
+ /* NonNegative */ false);
// If we found a constant offset in the left operand, stop and return that.
// This shortcut might cause us to miss opportunities of combining the
// constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
// However, such cases are probably already handled by -instcombine,
// given this pass runs after the standard optimizations.
if (ConstantOffset != 0) return ConstantOffset;
- ConstantOffset = find(U->getOperand(1));
+ ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
+ /* NonNegative */ false);
// If U is a sub operator, negate the constant offset found in the right
// operand.
- return IsSub ? -ConstantOffset : ConstantOffset;
+ if (BO->getOpcode() == Instruction::Sub)
+ ConstantOffset = -ConstantOffset;
+ return ConstantOffset;
}
-int64_t ConstantOffsetExtractor::find(Value *V) {
- // TODO(jingyue): We can even trace into integer/pointer casts, such as
+APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
+ bool ZeroExtended, bool NonNegative) {
+ // TODO(jingyue): We could trace into integer/pointer casts, such as
// inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
// integers because it gives good enough results for our benchmarks.
- assert(V->getType()->isIntegerTy());
+ unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+ // We cannot do much with Values that are not a User, such as an Argument.
User *U = dyn_cast<User>(V);
- // We cannot do much with Values that are not a User, such as BasicBlock and
- // MDNode.
- if (U == nullptr) return 0;
+ if (U == nullptr) return APInt(BitWidth, 0);
- int64_t ConstantOffset = 0;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) {
+ APInt ConstantOffset(BitWidth, 0);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// Hooray, we found it!
- ConstantOffset = CI->getSExtValue();
- } else if (Operator *O = dyn_cast<Operator>(U)) {
- // The GEP index may be more complicated than a simple addition of a
- // varaible and a constant. Therefore, we trace into subexpressions for more
- // hoisting opportunities.
- switch (O->getOpcode()) {
- case Instruction::Add: {
- ConstantOffset = findInEitherOperand(U, false);
- break;
- }
- case Instruction::Sub: {
- ConstantOffset = findInEitherOperand(U, true);
- break;
- }
- case Instruction::Or: {
- // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to
- // (LHS + RHS).
- if (NoCommonBits(U->getOperand(0), U->getOperand(1)))
- ConstantOffset = findInEitherOperand(U, false);
- break;
- }
- case Instruction::SExt:
- case Instruction::ZExt: {
- // We trace into sext/zext if the operator can be distributed to its
- // operand. e.g., we can transform into "sext (add nsw a, 5)" and
- // extract constant 5, because
- // sext (add nsw a, 5) == add nsw (sext a), 5
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
- if (Distributable(O->getOpcode(), BO))
- ConstantOffset = find(U->getOperand(0));
- }
- break;
- }
+ ConstantOffset = CI->getValue();
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
+ // Trace into subexpressions for more hoisting opportunities.
+ if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
+ ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
}
+ } else if (isa<SExtInst>(V)) {
+ ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
+ ZeroExtended, NonNegative).sext(BitWidth);
+ } else if (isa<ZExtInst>(V)) {
+ // As an optimization, we can clear the SignExtended flag because
+ // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
+ //
+ // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
+ ConstantOffset =
+ find(U->getOperand(0), /* SignExtended */ false,
+ /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
}
- // If we found a non-zero constant offset, adds it to the path for future
- // transformation (rebuildWithoutConstantOffset). Zero is a valid constant
- // offset, but doesn't help this optimization.
+
+ // If we found a non-zero constant offset, add it to the path for
+ // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
+ // help this optimization.
if (ConstantOffset != 0)
UserChain.push_back(U);
return ConstantOffset;
}
-unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) {
- for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) {
- if (U->getOperand(I) == Used)
- return I;
+Value *ConstantOffsetExtractor::applyExts(Value *V) {
+ Value *Current = V;
+ // ExtInsts is built in the use-def order. Therefore, we apply them to V
+ // in the reversed order.
+ for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
+ if (Constant *C = dyn_cast<Constant>(Current)) {
+ // If Current is a constant, apply s/zext using ConstantExpr::getCast.
+ // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
+ Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
+ } else {
+ Instruction *Ext = (*I)->clone();
+ Ext->setOperand(0, Current);
+ Ext->insertBefore(IP);
+ Current = Ext;
+ }
}
- return -1;
+ return Current;
}
-Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From,
- Value *To) {
- // Finds in U the first use of From. It is safe to ignore future occurrences
- // of From, because findInEitherOperand similarly stops searching the right
- // operand when the first operand has a non-zero constant offset.
- unsigned OpNo = FindFirstUse(U, From);
- assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly");
-
- // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions
- // and ConstantExprs). Therefore, U is either an Instruction or a
- // ConstantExpr.
- if (Instruction *I = dyn_cast<Instruction>(U)) {
- Instruction *Clone = I->clone();
- Clone->setOperand(OpNo, To);
- Clone->insertBefore(IP);
- return Clone;
+Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
+ distributeExtsAndCloneChain(UserChain.size() - 1);
+ // Remove all nullptrs (used to be s/zext) from UserChain.
+ unsigned NewSize = 0;
+ for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
+ if (*I != nullptr) {
+ UserChain[NewSize] = *I;
+ NewSize++;
+ }
}
- // cast<Constant>(To) is safe because a ConstantExpr only uses Constants.
- return cast<ConstantExpr>(U)
- ->getWithOperandReplaced(OpNo, cast<Constant>(To));
+ UserChain.resize(NewSize);
+ return removeConstOffset(UserChain.size() - 1);
}
-Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U,
- Value *C) {
- assert(U->getNumOperands() <= 2 &&
- "We didn't trace into any operator with more than 2 operands");
- // If U has only one operand which is the constant offset, removing the
- // constant offset leaves U as a null value.
- if (U->getNumOperands() == 1)
- return Constant::getNullValue(U->getType());
-
- // U->getNumOperands() == 2
- unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C
- assert(OpNo < 2 && "UserChain wasn't built correctly");
- Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U
- // If U = C - X, removing C makes U = -X; otherwise U will simply be X.
- if (!isa<SubOperator>(U) || OpNo == 1)
- return TheOther;
- if (isa<ConstantExpr>(U))
- return ConstantExpr::getNeg(cast<Constant>(TheOther));
- return BinaryOperator::CreateNeg(TheOther, "", IP);
+Value *
+ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
+ User *U = UserChain[ChainIndex];
+ if (ChainIndex == 0) {
+ assert(isa<ConstantInt>(U));
+ // If U is a ConstantInt, applyExts will return a ConstantInt as well.
+ return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
+ }
+
+ if (CastInst *Cast = dyn_cast<CastInst>(U)) {
+ assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
+ "We only traced into two types of CastInst: sext and zext");
+ ExtInsts.push_back(Cast);
+ UserChain[ChainIndex] = nullptr;
+ return distributeExtsAndCloneChain(ChainIndex - 1);
+ }
+
+ // Function find only trace into BinaryOperator and CastInst.
+ BinaryOperator *BO = cast<BinaryOperator>(U);
+ // OpNo = which operand of BO is UserChain[ChainIndex - 1]
+ unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
+ Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
+ Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
+
+ BinaryOperator *NewBO = nullptr;
+ if (OpNo == 0) {
+ NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
+ BO->getName(), IP);
+ } else {
+ NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
+ BO->getName(), IP);
+ }
+ return UserChain[ChainIndex] = NewBO;
}
-Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() {
- assert(UserChain.size() > 0 && "you at least found a constant, right?");
- // Start with the constant and go up through UserChain, each time building a
- // clone of the subexpression but with the constant removed.
- // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we
- // first c, then (b + c), and finally (a + (b + c)).
- //
- // Fast path: if the GEP index is a constant, simply returns 0.
- if (UserChain.size() == 1)
- return ConstantInt::get(UserChain[0]->getType(), 0);
-
- Value *Remainder =
- rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]);
- for (size_t I = 2; I < UserChain.size(); ++I)
- Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder);
- return Remainder;
+Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
+ if (ChainIndex == 0) {
+ assert(isa<ConstantInt>(UserChain[ChainIndex]));
+ return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
+ }
+
+ BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
+ unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
+ assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
+ Value *NextInChain = removeConstOffset(ChainIndex - 1);
+ Value *TheOther = BO->getOperand(1 - OpNo);
+
+ // If NextInChain is 0 and not the LHS of a sub, we can simplify the
+ // sub-expression to be just TheOther.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
+ if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
+ return TheOther;
+ }
+
+ if (BO->getOpcode() == Instruction::Or) {
+ // Rebuild "or" as "add", because "or" may be invalid for the new
+ // epxression.
+ //
+ // For instance, given
+ // a | (b + 5) where a and b + 5 have no common bits,
+ // we can extract 5 as the constant offset.
+ //
+ // However, reusing the "or" in the new index would give us
+ // (a | b) + 5
+ // which does not equal a | (b + 5).
+ //
+ // Replacing the "or" with "add" is fine, because
+ // a | (b + 5) = a + (b + 5) = (a + b) + 5
+ return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
+ BO->getName(), IP);
+ }
+
+ // We can reuse BO in this case, because the new expression shares the same
+ // instruction type and BO is used at most once.
+ assert(BO->getNumUses() <= 1 &&
+ "distributeExtsAndCloneChain clones each BinaryOperator in "
+ "UserChain, so no one should be used more than "
+ "once");
+ BO->setOperand(OpNo, NextInChain);
+ BO->setHasNoSignedWrap(false);
+ BO->setHasNoUnsignedWrap(false);
+ // Make sure it appears after all instructions we've inserted so far.
+ BO->moveBefore(IP);
+ return BO;
}
int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
const DataLayout *DL,
- Instruction *IP) {
- ConstantOffsetExtractor Extractor(DL, IP);
+ GetElementPtrInst *GEP) {
+ ConstantOffsetExtractor Extractor(DL, GEP);
// Find a non-zero constant offset first.
- int64_t ConstantOffset = Extractor.find(Idx);
- if (ConstantOffset == 0)
- return 0;
- // Then rebuild a new index with the constant removed.
- NewIdx = Extractor.rebuildWithoutConstantOffset();
- return ConstantOffset;
+ APInt ConstantOffset =
+ Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
+ GEP->isInBounds());
+ if (ConstantOffset != 0) {
+ // Separates the constant offset from the GEP index.
+ NewIdx = Extractor.rebuildWithoutConstOffset();
+ }
+ return ConstantOffset.getSExtValue();
}
-int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) {
- return ConstantOffsetExtractor(DL, nullptr).find(Idx);
+int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
+ GetElementPtrInst *GEP) {
+ // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
+ return ConstantOffsetExtractor(DL, GEP)
+ .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
+ GEP->isInBounds())
+ .getSExtValue();
}
void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
@@ -421,8 +595,64 @@ bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
}
-int64_t SeparateConstOffsetFromGEP::accumulateByteOffset(
- GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) {
+bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
+ GetElementPtrInst *GEP) {
+ bool Changed = false;
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
+ I != E; ++I, ++GTI) {
+ // Skip struct member indices which must be i32.
+ if (isa<SequentialType>(*GTI)) {
+ if ((*I)->getType() != IntPtrTy) {
+ *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+bool
+SeparateConstOffsetFromGEP::convertInBoundsZExtToSExt(GetElementPtrInst *GEP) {
+ if (!GEP->isInBounds())
+ return false;
+
+ // TODO: consider alloca
+ GlobalVariable *UnderlyingObject =
+ dyn_cast<GlobalVariable>(GEP->getPointerOperand());
+ if (UnderlyingObject == nullptr)
+ return false;
+
+ uint64_t ObjectSize =
+ DL->getTypeAllocSize(UnderlyingObject->getType()->getElementType());
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ bool Changed = false;
+ for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
+ ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ if (ZExtInst *Extended = dyn_cast<ZExtInst>(*I)) {
+ unsigned SrcBitWidth =
+ cast<IntegerType>(Extended->getSrcTy())->getBitWidth();
+ // For GEP operand zext(a), if a <= max signed value of typeof(a), then
+ // the sign bit of a is zero and sext(a) = zext(a). Because the GEP is
+ // in bounds, we know a <= ObjectSize, so the condition can be reduced
+ // to ObjectSize <= max signed value of typeof(a).
+ if (ObjectSize <=
+ APInt::getSignedMaxValue(SrcBitWidth).getZExtValue()) {
+ *I = new SExtInst(Extended->getOperand(0), Extended->getType(),
+ Extended->getName(), GEP);
+ Changed = true;
+ }
+ }
+ }
+ }
+ return Changed;
+}
+
+int64_t
+SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
+ bool &NeedsExtraction) {
NeedsExtraction = false;
int64_t AccumulativeByteOffset = 0;
gep_type_iterator GTI = gep_type_begin(*GEP);
@@ -430,7 +660,7 @@ int64_t SeparateConstOffsetFromGEP::accumulateByteOffset(
if (isa<SequentialType>(*GTI)) {
// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
- ConstantOffsetExtractor::Find(GEP->getOperand(I), DL);
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
@@ -455,31 +685,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
return false;
bool Changed = false;
+ Changed |= canonicalizeArrayIndicesToPointerSize(GEP);
+ Changed |= convertInBoundsZExtToSExt(GEP);
- // Shortcuts integer casts. Eliminating these explicit casts can make
- // subsequent optimizations more obvious: ConstantOffsetExtractor needn't
- // trace into these casts.
- if (GEP->isInBounds()) {
- // Doing this to inbounds GEPs is safe because their indices are guaranteed
- // to be non-negative and in bounds.
- gep_type_iterator GTI = gep_type_begin(*GEP);
- for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
- if (isa<SequentialType>(*GTI)) {
- if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) {
- if (O->getOpcode() == Instruction::SExt ||
- O->getOpcode() == Instruction::ZExt) {
- GEP->setOperand(I, O->getOperand(0));
- Changed = true;
- }
- }
- }
- }
- }
-
- const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
bool NeedsExtraction;
- int64_t AccumulativeByteOffset =
- accumulateByteOffset(GEP, DL, NeedsExtraction);
+ int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
if (!NeedsExtraction)
return Changed;
@@ -506,30 +716,29 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
assert(NewIdx != nullptr &&
"ConstantOffset != 0 implies NewIdx is set");
GEP->setOperand(I, NewIdx);
- // Clear the inbounds attribute because the new index may be off-bound.
- // e.g.,
- //
- // b = add i64 a, 5
- // addr = gep inbounds float* p, i64 b
- //
- // is transformed to:
- //
- // addr2 = gep float* p, i64 a
- // addr = gep float* addr2, i64 5
- //
- // If a is -4, although the old index b is in bounds, the new index a is
- // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
- // inbounds keyword is not present, the offsets are added to the base
- // address with silently-wrapping two's complement arithmetic".
- // Therefore, the final code will be a semantically equivalent.
- //
- // TODO(jingyue): do some range analysis to keep as many inbounds as
- // possible. GEPs with inbounds are more friendly to alias analysis.
- GEP->setIsInBounds(false);
- Changed = true;
}
}
}
+ // Clear the inbounds attribute because the new index may be off-bound.
+ // e.g.,
+ //
+ // b = add i64 a, 5
+ // addr = gep inbounds float* p, i64 b
+ //
+ // is transformed to:
+ //
+ // addr2 = gep float* p, i64 a
+ // addr = gep float* addr2, i64 5
+ //
+ // If a is -4, although the old index b is in bounds, the new index a is
+ // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
+ // inbounds keyword is not present, the offsets are added to the base
+ // address with silently-wrapping two's complement arithmetic".
+ // Therefore, the final code will be a semantically equivalent.
+ //
+ // TODO(jingyue): do some range analysis to keep as many inbounds as
+ // possible. GEPs with inbounds are more friendly to alias analysis.
+ GEP->setIsInBounds(false);
// Offsets the base with the accumulative byte offset.
//
@@ -562,9 +771,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Instruction *NewGEP = GEP->clone();
NewGEP->insertBefore(GEP);
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
uint64_t ElementTypeSizeOfGEP =
DL->getTypeAllocSize(GEP->getType()->getElementType());
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is natually aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 482c33a..7348c45 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
@@ -34,6 +35,7 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
AliasAnalysis *AA;
+ const DataLayout *DL;
public:
static char ID; // Pass identification
@@ -98,6 +100,8 @@ bool Sinking::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool MadeChange, EverMadeChange = false;
@@ -193,7 +197,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
- if (!isSafeToSpeculativelyExecute(Inst))
+ if (!isSafeToSpeculativelyExecute(Inst, DL))
return false;
// We don't want to sink across a critical edge if we don't dominate the
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index cbd8dd0..2390027 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -33,7 +33,6 @@ transforms_utils_SRC_FILES := \
SimplifyIndVar.cpp \
SimplifyInstructions.cpp \
SimplifyLibCalls.cpp \
- SpecialCaseList.cpp \
UnifyFunctionExitNodes.cpp \
Utils.cpp \
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index e10ca90..fcf548f 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -33,7 +33,6 @@ add_llvm_library(LLVMTransformUtils
SimplifyIndVar.cpp
SimplifyInstructions.cpp
SimplifyLibCalls.cpp
- SpecialCaseList.cpp
UnifyFunctionExitNodes.cpp
Utils.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index eb67db1..3f75b3e 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -107,7 +107,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
- if (const GlobalObject *C = I->getAliasee())
+ if (const Constant *C = I->getAliasee())
GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap)));
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index e01d0c3..f0a9f2b 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -189,6 +189,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
Invoke.getOuterResumeDest(),
InvokeArgs, CI->getName(), BB);
+ II->setDebugLoc(CI->getDebugLoc());
II->setCallingConv(CI->getCallingConv());
II->setAttributes(CI->getAttributes());
@@ -466,7 +467,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
DebugLoc DL = BI->getDebugLoc();
- if (!DL.isUnknown()) {
+ if (DL.isUnknown()) {
+ // If the inlined instruction has no line number, make it look as if it
+ // originates from the call location. This is important for
+ // ((__always_inline__, __nodebug__)) functions which must use caller
+ // location for all instructions in their function body.
+ BI->setDebugLoc(TheCallDL);
+ } else {
BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
LLVMContext &Ctx = BI->getContext();
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f7787da..ef42291 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -50,6 +50,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -473,7 +474,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
/// explicit if they accepted the analysis directly and then updated it.
static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
- ScalarEvolution *SE, Pass *PP) {
+ ScalarEvolution *SE, Pass *PP,
+ const DataLayout *DL) {
bool Changed = false;
ReprocessLoop:
@@ -672,7 +674,7 @@ ReprocessLoop:
// The block has now been cleared of all instructions except for
// a comparison and a conditional branch. SimplifyCFG may be able
// to fold it now.
- if (!FoldBranchToCommonDest(BI)) continue;
+ if (!FoldBranchToCommonDest(BI, DL)) continue;
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and delete it.
@@ -709,7 +711,8 @@ ReprocessLoop:
}
bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
- AliasAnalysis *AA, ScalarEvolution *SE) {
+ AliasAnalysis *AA, ScalarEvolution *SE,
+ const DataLayout *DL) {
bool Changed = false;
// Worklist maintains our depth-first queue of loops in this nest to process.
@@ -726,7 +729,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
}
while (!Worklist.empty())
- Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP);
+ Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
+ SE, PP, DL);
return Changed;
}
@@ -744,6 +748,7 @@ namespace {
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
+ const DataLayout *DL;
bool runOnFunction(Function &F) override;
@@ -787,10 +792,12 @@ bool LoopSimplify::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = getAnalysisIfAvailable<ScalarEvolution>();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
// Simplify each loop nest in the function.
for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
- Changed |= simplifyLoop(*I, DT, LI, this, AA, SE);
+ Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL);
return Changed;
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d953e30..c86b82c 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/LLVMContext.h"
@@ -242,21 +243,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Twine("completely unrolled loop with ") +
Twine(TripCount) + " iterations");
} else {
+ auto EmitDiag = [&](const Twine &T) {
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
+ "unrolled loop by a factor of " + Twine(Count) +
+ T);
+ };
+
DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
<< " by " << Count);
- Twine DiagMsg("unrolled loop by a factor of " + Twine(Count));
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
- DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip));
+ EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
- DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch");
+ EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
} else if (RuntimeTripCount) {
DEBUG(dbgs() << " with run-time trip count");
- DiagMsg.concat(" with run-time trip count");
+ EmitDiag(" with run-time trip count");
}
DEBUG(dbgs() << "!\n");
- emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg);
}
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
@@ -485,8 +490,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
if (!OuterL && !CompletelyUnroll)
OuterL = L;
if (OuterL) {
+ DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>();
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL);
+
+ // LCSSA must be performed on the outermost affected loop. The unrolled
+ // loop's last loop latch is guaranteed to be in the outermost loop after
+ // deleteLoopFromQueue updates LoopInfo.
+ Loop *LatchLoop = LI->getLoopFor(Latches.back());
+ if (!OuterL->contains(LatchLoop))
+ while (OuterL->getParentLoop() != LatchLoop)
+ OuterL = OuterL->getParentLoop();
+
formLCSSARecursively(*OuterL, *DT, SE);
}
}
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 5bef091..a96c46a 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -280,17 +280,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
SCEVExpander Expander(*SE, "loop-unroll");
Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
PreHeaderBR);
- Type *CountTy = TripCount->getType();
- BinaryOperator *ModVal =
- BinaryOperator::CreateURem(TripCount,
- ConstantInt::get(CountTy, Count),
- "xtraiter");
- ModVal->insertBefore(PreHeaderBR);
-
- // Check if for no extra iterations, then jump to unrolled loop
- Value *BranchVal = new ICmpInst(PreHeaderBR,
- ICmpInst::ICMP_NE, ModVal,
- ConstantInt::get(CountTy, 0), "lcmp");
+
+ IRBuilder<> B(PreHeaderBR);
+ Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+ // Check if for no extra iterations, then jump to unrolled loop. We have to
+ // check that the trip count computation didn't overflow when adding one to
+ // the backedge taken count.
+ Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod");
+ Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow");
+ Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or");
+
// Branch to either the extra iterations or the unrolled loop
// We will fix up the true branch label when adding loop body copies
BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
@@ -344,6 +344,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
}
// The comparison w/ the extra iteration value and branch
+ Type *CountTy = TripCount->getType();
Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
ConstantInt::get(CountTy, leftOverIters),
"un.tmp");
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 9ef694c..eac693b 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,11 +14,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/CFG.h"
#include "llvm/Pass.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
@@ -58,16 +60,18 @@ namespace {
Low(low), High(high), BB(bb) { }
};
- typedef std::vector<CaseRange> CaseVector;
+ typedef std::vector<CaseRange> CaseVector;
typedef std::vector<CaseRange>::iterator CaseItr;
private:
void processSwitchInst(SwitchInst *SI);
- BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
- BasicBlock* OrigBlock, BasicBlock* Default);
- BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
- BasicBlock* OrigBlock, BasicBlock* Default);
- unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+ BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
+ ConstantInt *LowerBound, ConstantInt *UpperBound,
+ Value *Val, BasicBlock *OrigBlock,
+ BasicBlock *Default);
+ BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock,
+ BasicBlock *Default);
+ unsigned Clusterify(CaseVector &Cases, SwitchInst *SI);
};
/// The comparison function for sorting the switch case values in the vector.
@@ -129,15 +133,26 @@ static raw_ostream& operator<<(raw_ostream &O,
// switchConvert - Convert the switch statement into a binary lookup of
// the case values. The function recursively builds this tree.
-//
-BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
- Value* Val, BasicBlock* OrigBlock,
- BasicBlock* Default)
-{
+// LowerBound and UpperBound are used to keep track of the bounds for Val
+// that have already been checked by a block emitted by one of the previous
+// calls to switchConvert in the call stack.
+BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+ ConstantInt *LowerBound,
+ ConstantInt *UpperBound, Value *Val,
+ BasicBlock *OrigBlock,
+ BasicBlock *Default) {
unsigned Size = End - Begin;
- if (Size == 1)
+ if (Size == 1) {
+ // Check if the Case Range is perfectly squeezed in between
+ // already checked Upper and Lower bounds. If it is then we can avoid
+ // emitting the code that checks if the value actually falls in the range
+ // because the bounds already tell us so.
+ if (Begin->Low == LowerBound && Begin->High == UpperBound) {
+ return Begin->BB;
+ }
return newLeafBlock(*Begin, Val, OrigBlock, Default);
+ }
unsigned Mid = Size / 2;
std::vector<CaseRange> LHS(Begin, Begin + Mid);
@@ -145,15 +160,50 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
std::vector<CaseRange> RHS(Begin + Mid, End);
DEBUG(dbgs() << "RHS: " << RHS << "\n");
- CaseRange& Pivot = *(Begin + Mid);
- DEBUG(dbgs() << "Pivot ==> "
- << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
- << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+ CaseRange &Pivot = *(Begin + Mid);
+ DEBUG(dbgs() << "Pivot ==> "
+ << cast<ConstantInt>(Pivot.Low)->getValue()
+ << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+ // NewLowerBound here should never be the integer minimal value.
+ // This is because it is computed from a case range that is never
+ // the smallest, so there is always a case range that has at least
+ // a smaller value.
+ ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low);
+ ConstantInt *NewUpperBound;
+
+ // If we don't have a Default block then it means that we can never
+ // have a value outside of a case range, so set the UpperBound to the highest
+ // value in the LHS part of the case ranges.
+ if (Default != nullptr) {
+ // Because NewLowerBound is never the smallest representable integer
+ // it is safe here to subtract one.
+ NewUpperBound = ConstantInt::get(NewLowerBound->getContext(),
+ NewLowerBound->getValue() - 1);
+ } else {
+ CaseItr LastLHS = LHS.begin() + LHS.size() - 1;
+ NewUpperBound = cast<ConstantInt>(LastLHS->High);
+ }
- BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
- OrigBlock, Default);
- BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
- OrigBlock, Default);
+ DEBUG(dbgs() << "LHS Bounds ==> ";
+ if (LowerBound) {
+ dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue();
+ } else {
+ dbgs() << "NONE";
+ }
+ dbgs() << " - " << NewUpperBound->getSExtValue() << "\n";
+ dbgs() << "RHS Bounds ==> ";
+ dbgs() << NewLowerBound->getSExtValue() << " - ";
+ if (UpperBound) {
+ dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n";
+ } else {
+ dbgs() << "NONE\n";
+ });
+
+ BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound,
+ NewUpperBound, Val, OrigBlock, Default);
+ BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound,
+ UpperBound, Val, OrigBlock, Default);
// Create a new node that checks if the value is < pivot. Go to the
// left branch if it is and right branch if not.
@@ -291,13 +341,19 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
return;
}
+ const bool DefaultIsUnreachable =
+ Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator());
// Create a new, empty default block so that the new hierarchy of
// if-then statements go to this and the PHI nodes are happy.
- BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
- F->getBasicBlockList().insert(Default, NewDefault);
-
- BranchInst::Create(Default, NewDefault);
-
+ // if the default block is set as an unreachable we avoid creating one
+ // because will never be a valid target.
+ BasicBlock *NewDefault = nullptr;
+ if (!DefaultIsUnreachable) {
+ NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+ F->getBasicBlockList().insert(Default, NewDefault);
+
+ BranchInst::Create(Default, NewDefault);
+ }
// If there is an entry in any PHI nodes for the default edge, make sure
// to update them as well.
for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
@@ -316,12 +372,31 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
DEBUG(dbgs() << "Cases: " << Cases << "\n");
(void)numCmps;
- BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
- OrigBlock, NewDefault);
+ ConstantInt *UpperBound = nullptr;
+ ConstantInt *LowerBound = nullptr;
+
+ // Optimize the condition where Default is an unreachable block. In this case
+ // we can make the bounds tightly fitted around the case value ranges,
+ // because we know that the value passed to the switch should always be
+ // exactly one of the case values.
+ if (DefaultIsUnreachable) {
+ CaseItr LastCase = Cases.begin() + Cases.size() - 1;
+ UpperBound = cast<ConstantInt>(LastCase->High);
+ LowerBound = cast<ConstantInt>(Cases.begin()->Low);
+ }
+ BasicBlock *SwitchBlock =
+ switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,
+ OrigBlock, NewDefault);
// Branch to our shiny new if-then stuff...
BranchInst::Create(SwitchBlock, OrigBlock);
// We are now done with the switch instruction, delete it.
CurBlock->getInstList().erase(SI);
+
+ pred_iterator PI = pred_begin(Default), E = pred_end(Default);
+ // If the Default block has no more predecessors just remove it
+ if (PI == E) {
+ DeleteDeadBlock(Default);
+ }
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 150dbdd..960b198 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -201,8 +201,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
/// ComputeSpeculationCost - Compute an abstract "cost" of speculating the
/// given instruction, which is assumed to be safe to speculate. 1 means
/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
-static unsigned ComputeSpeculationCost(const User *I) {
- assert(isSafeToSpeculativelyExecute(I) &&
+static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) {
+ assert(isSafeToSpeculativelyExecute(I, DL) &&
"Instruction is not safe to speculatively execute!");
switch (Operator::getOpcode(I)) {
default:
@@ -227,6 +227,9 @@ static unsigned ComputeSpeculationCost(const User *I) {
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
+ case Instruction::BitCast:
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement:
return 1; // These are all cheap.
case Instruction::Call:
@@ -254,7 +257,8 @@ static unsigned ComputeSpeculationCost(const User *I) {
/// CostRemaining, false is returned and CostRemaining is undefined.
static bool DominatesMergePoint(Value *V, BasicBlock *BB,
SmallPtrSet<Instruction*, 4> *AggressiveInsts,
- unsigned &CostRemaining) {
+ unsigned &CostRemaining,
+ const DataLayout *DL) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Non-instructions all dominate instructions, but not all constantexprs
@@ -287,10 +291,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, it looks like the instruction IS in the "condition". Check to
// see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
- if (!isSafeToSpeculativelyExecute(I))
+ if (!isSafeToSpeculativelyExecute(I, DL))
return false;
- unsigned Cost = ComputeSpeculationCost(I);
+ unsigned Cost = ComputeSpeculationCost(I, DL);
if (Cost > CostRemaining)
return false;
@@ -300,7 +304,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// Okay, we can only really hoist these out if their operands do
// not take us over the cost threshold.
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
- if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining))
+ if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL))
return false;
// Okay, it's safe to do this! Remember this instruction.
AggressiveInsts->insert(I);
@@ -994,7 +998,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
-static bool HoistThenElseCodeToIf(BranchInst *BI) {
+static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1068,9 +1072,9 @@ HoistTerminator:
if (BB1V == BB2V)
continue;
- if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
+ if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL))
return Changed;
- if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
+ if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL))
return Changed;
}
}
@@ -1387,7 +1391,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
/// \endcode
///
/// \returns true if the conditional block is removed.
-static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
+ const DataLayout *DL) {
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
if (isa<FCmpInst>(BrCond))
@@ -1430,13 +1435,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(I) &&
+ if (!isSafeToSpeculativelyExecute(I, DL) &&
!(HoistCondStores &&
(SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB,
EndBB))))
return false;
if (!SpeculatedStoreValue &&
- ComputeSpeculationCost(I) > PHINodeFoldingThreshold)
+ ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold)
return false;
// Store the store speculation candidate.
@@ -1487,11 +1492,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
if (!OrigCE && !ThenCE)
continue; // Known safe and cheap.
- if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
- (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
+ if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) ||
+ (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL)))
return false;
- unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0;
- unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0;
+ unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0;
+ unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0;
if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold)
return false;
@@ -1738,9 +1743,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
}
if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts,
- MaxCostVal0) ||
+ MaxCostVal0, DL) ||
!DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts,
- MaxCostVal1))
+ MaxCostVal1, DL))
return false;
}
@@ -1958,7 +1963,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
/// FoldBranchToCommonDest - If this basic block is simple enough, and if a
/// predecessor branches to us and one of our successors, fold the block into
/// the predecessor and use logical operations to pick the right destination.
-bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = nullptr;
@@ -2010,7 +2015,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
Instruction *BonusInst = nullptr;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
- isSafeToSpeculativelyExecute(FrontIt)) {
+ isSafeToSpeculativelyExecute(FrontIt, DL)) {
BonusInst = &*FrontIt;
++FrontIt;
@@ -2025,7 +2030,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
- // Ingore dbg intrinsics.
+ // Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
if (&*CondIt != BI)
@@ -2340,7 +2345,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
}
// If this is a conditional branch in an empty block, and if any
- // predecessors is a conditional branch to one of our destinations,
+ // predecessors are a conditional branch to one of our destinations,
// fold the conditions into logical ops and one cond br.
BasicBlock::iterator BBI = BB->begin();
// Ignore dbg intrinsics.
@@ -2375,16 +2380,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Do not perform this transformation if it would require
// insertion of a large number of select instructions. For targets
// without predication/cmovs, this is a big pessimization.
- BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+ // Also do not perform this transformation if any phi node in the common
+ // destination block can trap when reached by BB or PBB (PR17073). In that
+ // case, it would be unsafe to hoist the operation into a select instruction.
+
+ BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
unsigned NumPhis = 0;
for (BasicBlock::iterator II = CommonDest->begin();
- isa<PHINode>(II); ++II, ++NumPhis)
+ isa<PHINode>(II); ++II, ++NumPhis) {
if (NumPhis > 2) // Disable this xform.
return false;
+ PHINode *PN = cast<PHINode>(II);
+ Value *BIV = PN->getIncomingValueForBlock(BB);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
+ if (CE->canTrap())
+ return false;
+
+ unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+ Value *PBIV = PN->getIncomingValue(PBBIdx);
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
+ if (CE->canTrap())
+ return false;
+ }
+
// Finally, if everything is ok, fold the branches to logical ops.
- BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
+ BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
<< "AND: " << *BI->getParent());
@@ -3308,6 +3330,11 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
/// ValidLookupTableConstant - Return true if the backend will be able to handle
/// initializing an array of constants like C.
static bool ValidLookupTableConstant(Constant *C) {
+ if (C->isThreadDependent())
+ return false;
+ if (C->isDLLImportDependent())
+ return false;
+
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
return CE->isGEPWithNoNotionalOverIndexing();
@@ -3521,7 +3548,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
// Fill in any holes in the table with the default result.
if (Values.size() < TableSize) {
- assert(DefaultValue && "Need a default value to fill the lookup table holes.");
+ assert(DefaultValue &&
+ "Need a default value to fill the lookup table holes.");
assert(DefaultValue->getType() == ValueType);
for (uint64_t I = 0; I < TableSize; ++I) {
if (!TableContents[I])
@@ -3990,7 +4018,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
// branches to us and our successor, fold the comparison into the
// predecessor and use logical operations to update the incoming value
// for PHI nodes in common successor.
- if (FoldBranchToCommonDest(BI))
+ if (FoldBranchToCommonDest(BI, DL))
return SimplifyCFG(BB, TTI, DL) | true;
return false;
}
@@ -4034,7 +4062,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// If this basic block is ONLY a compare and a branch, and if a predecessor
// branches to us and one of our successors, fold the comparison into the
// predecessor and use logical operations to pick the right destination.
- if (FoldBranchToCommonDest(BI))
+ if (FoldBranchToCommonDest(BI, DL))
return SimplifyCFG(BB, TTI, DL) | true;
// We have a conditional branch to two blocks that are only reachable
@@ -4043,24 +4071,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistThenElseCodeToIf(BI))
+ if (HoistThenElseCodeToIf(BI, DL))
return SimplifyCFG(BB, TTI, DL) | true;
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
- // execute Successor #0 if it branches to successor #1.
+ // execute Successor #0 if it branches to Successor #1.
TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL))
return SimplifyCFG(BB, TTI, DL) | true;
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
- // execute Successor #1 if it branches to successor #0.
+ // execute Successor #1 if it branches to Successor #0.
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL))
return SimplifyCFG(BB, TTI, DL) | true;
}
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 34d8a10..cb8a41d 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -209,6 +209,29 @@ namespace {
class LoopVectorizationLegality;
class LoopVectorizationCostModel;
+/// Optimization analysis message produced during vectorization. Messages inform
+/// the user why vectorization did not occur.
+class Report {
+ std::string Message;
+ raw_string_ostream Out;
+ Instruction *Instr;
+
+public:
+ Report(Instruction *I = nullptr) : Out(Message), Instr(I) {
+ Out << "loop not vectorized: ";
+ }
+
+ template <typename A> Report &operator<<(const A &Value) {
+ Out << Value;
+ return *this;
+ }
+
+ Instruction *getInstr() { return Instr; }
+
+ std::string &str() { return Out.str(); }
+ operator Twine() { return Out.str(); }
+};
+
/// InnerLoopVectorizer vectorizes loops which contain only one basic
/// block to a specified vectorization factor (VF).
/// This class performs the widening of scalars into vectors, or multiple
@@ -515,10 +538,12 @@ public:
unsigned NumPredStores;
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
- DominatorTree *DT, TargetLibraryInfo *TLI)
+ DominatorTree *DT, TargetLibraryInfo *TLI,
+ Function *F)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr),
- HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {}
+ DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr),
+ WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
+ }
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -747,6 +772,16 @@ private:
/// invariant.
void collectStridedAcccess(Value *LoadOrStoreInst);
+ /// Report an analysis message to assist the user in diagnosing loops that are
+ /// not vectorized.
+ void emitAnalysis(Report &Message) {
+ DebugLoc DL = TheLoop->getStartLoc();
+ if (Instruction *I = Message.getInstr())
+ DL = I->getDebugLoc();
+ emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE,
+ *TheFunction, DL, Message.str());
+ }
+
/// The loop that we evaluate.
Loop *TheLoop;
/// Scev analysis.
@@ -757,6 +792,8 @@ private:
DominatorTree *DT;
/// Target Library Info.
TargetLibraryInfo *TLI;
+ /// Parent function
+ Function *TheFunction;
// --- vectorization state --- //
@@ -906,7 +943,7 @@ public:
}
/// Return the loop vectorizer metadata prefix.
- static StringRef Prefix() { return "llvm.vectorizer."; }
+ static StringRef Prefix() { return "llvm.loop.vectorize."; }
MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const {
SmallVector<Value*, 2> Vals;
@@ -942,6 +979,29 @@ public:
LoopID = NewLoopID;
}
+ std::string emitRemark() const {
+ Report R;
+ R << "vectorization ";
+ switch (Force) {
+ case LoopVectorizeHints::FK_Disabled:
+ R << "is explicitly disabled";
+ break;
+ case LoopVectorizeHints::FK_Enabled:
+ R << "is explicitly enabled";
+ if (Width != 0 && Unroll != 0)
+ R << " with width " << Width << " and interleave count " << Unroll;
+ else if (Width != 0)
+ R << " with width " << Width;
+ else if (Unroll != 0)
+ R << " with interleave count " << Unroll;
+ break;
+ case LoopVectorizeHints::FK_Undefined:
+ R << "was not specified";
+ break;
+ }
+ return R.str();
+ }
+
unsigned getWidth() const { return Width; }
unsigned getUnroll() const { return Unroll; }
enum ForceKind getForce() const { return Force; }
@@ -1125,18 +1185,37 @@ struct LoopVectorize : public FunctionPass {
: "?")) << " width=" << Hints.getWidth()
<< " unroll=" << Hints.getUnroll() << "\n");
+ // Function containing loop
+ Function *F = L->getHeader()->getParent();
+
+ // Looking at the diagnostic output is the only way to determine if a loop
+ // was vectorized (other than looking at the IR or machine code), so it
+ // is important to generate an optimization remark for each loop. Most of
+ // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
+ // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
+ // less verbose reporting vectorized loops and unvectorized loops that may
+ // benefit from vectorization, respectively.
+
if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized: vector width and interleave count are "
+ "explicitly set to 1");
return false;
}
@@ -1151,14 +1230,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
else {
DEBUG(dbgs() << "\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "vectorization is not beneficial and is not explicitly forced");
return false;
}
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1167,7 +1251,6 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
- Function *F = L->getHeader()->getParent();
bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
F->hasFnAttribute(Attribute::OptimizeForSize);
@@ -1190,6 +1273,11 @@ struct LoopVectorize : public FunctionPass {
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
"attribute is used.\n");
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "loop not vectorized due to NoImplicitFloat attribute");
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
+ L->getStartLoc(), Hints.emitRemark());
return false;
}
@@ -1208,9 +1296,14 @@ struct LoopVectorize : public FunctionPass {
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
- DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
- if (UF == 1)
+ DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+
+ if (UF == 1) {
+ emitOptimizationRemarkAnalysis(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ "not beneficial to vectorize and user disabled interleaving");
return false;
+ }
DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
// Report the unrolling decision.
@@ -1220,6 +1313,7 @@ struct LoopVectorize : public FunctionPass {
" (vectorization not beneficial)"));
// We decided not to vectorize, but we may want to unroll.
+
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
} else {
@@ -1909,20 +2003,23 @@ void InnerLoopVectorizer::createEmptyLoop() {
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- vector loop bypass (may consist of multiple blocks).
- / |
- / v
- | [ ] <-- vector pre header.
- | |
- | v
- | [ ] \
- | [ ]_| <-- vector loop.
- | |
- \ v
- >[ ] <--- middle-block.
- / |
- / v
- | [ ] <--- new preheader.
+ [ ] <-- Back-edge taken count overflow check.
+ / |
+ / v
+ | [ ] <-- vector loop bypass (may consist of multiple blocks).
+ | / |
+ | / v
+ || [ ] <-- vector pre header.
+ || |
+ || v
+ || [ ] \
+ || [ ]_| <-- vector loop.
+ || |
+ | \ v
+ | >[ ] <--- middle-block.
+ | / |
+ | / v
+ -|- >[ ] <--- new preheader.
| |
| v
| [ ] \
@@ -1936,6 +2033,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
BasicBlock *OldBasicBlock = OrigLoop->getHeader();
BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(BypassBlock && "Invalid loop structure");
assert(ExitBlock && "Must have an exit block");
// Some loops have a single integer induction variable, while other loops
@@ -1958,18 +2056,30 @@ void InnerLoopVectorizer::createEmptyLoop() {
IdxTy->getPrimitiveSizeInBits())
ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
- ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
+ const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
// Get the total trip count from the count by adding 1.
- ExitCount = SE->getAddExpr(ExitCount,
- SE->getConstant(ExitCount->getType(), 1));
+ ExitCount = SE->getAddExpr(BackedgeTakeCount,
+ SE->getConstant(BackedgeTakeCount->getType(), 1));
// Expand the trip count and place the new instructions in the preheader.
// Notice that the pre-header does not change, only the loop body.
SCEVExpander Exp(*SE, "induction");
- // Count holds the overall loop count (N).
- Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- BypassBlock->getTerminator());
+ // We need to test whether the backedge-taken count is uint##_max. Adding one
+ // to it will cause overflow and an incorrect loop trip count in the vector
+ // body. In case of overflow we want to directly jump to the scalar remainder
+ // loop.
+ Value *BackedgeCount =
+ Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
+ BypassBlock->getTerminator());
+ if (BackedgeCount->getType()->isPointerTy())
+ BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
+ "backedge.ptrcnt.to.int",
+ BypassBlock->getTerminator());
+ Instruction *CheckBCOverflow =
+ CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
+ Constant::getAllOnesValue(BackedgeCount->getType()),
+ "backedge.overflow", BypassBlock->getTerminator());
// The loop index does not have to start at Zero. Find the original start
// value from the induction PHI node. If we don't have an induction variable
@@ -1980,7 +2090,18 @@ void InnerLoopVectorizer::createEmptyLoop() {
IdxTy):
ConstantInt::get(IdxTy, 0);
- assert(BypassBlock && "Invalid loop structure");
+ // We need an instruction to anchor the overflow check on. StartIdx needs to
+ // be defined before the overflow check branch. Because the scalar preheader
+ // is going to merge the start index and so the overflow branch block needs to
+ // contain a definition of the start index.
+ Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd(
+ StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor",
+ BypassBlock->getTerminator());
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+ BypassBlock->getTerminator());
+
LoopBypassBlocks.push_back(BypassBlock);
// Split the single block loop into the two loop structure described above.
@@ -2049,29 +2170,45 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop.
- Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx,
- "cmp.zero");
+ Value *Cmp =
+ BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
BasicBlock *LastBypassBlock = BypassBlock;
+ // Generate code to check that the loops trip count that we computed by adding
+ // one to the backedge-taken count will not overflow.
+ {
+ auto PastOverflowCheck =
+ std::next(BasicBlock::iterator(OverflowCheckAnchor));
+ BasicBlock *CheckBlock =
+ LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked");
+ if (ParentLoop)
+ ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
+ LoopBypassBlocks.push_back(CheckBlock);
+ Instruction *OldTerm = LastBypassBlock->getTerminator();
+ BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm);
+ OldTerm->eraseFromParent();
+ LastBypassBlock = CheckBlock;
+ }
+
// Generate the code to check that the strides we assumed to be one are really
// one. We want the new basic block to start at the first instruction in a
// sequence of instructions that form a check.
Instruction *StrideCheck;
Instruction *FirstCheckInst;
std::tie(FirstCheckInst, StrideCheck) =
- addStrideCheck(BypassBlock->getTerminator());
+ addStrideCheck(LastBypassBlock->getTerminator());
if (StrideCheck) {
// Create a new block containing the stride check.
BasicBlock *CheckBlock =
- BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
+ LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck");
if (ParentLoop)
ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase());
LoopBypassBlocks.push_back(CheckBlock);
// Replace the branch into the memory check block with a conditional branch
// for the "few elements case".
- Instruction *OldTerm = BypassBlock->getTerminator();
+ Instruction *OldTerm = LastBypassBlock->getTerminator();
BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm);
OldTerm->eraseFromParent();
@@ -2134,6 +2271,19 @@ void InnerLoopVectorizer::createEmptyLoop() {
PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
MiddleBlock->getTerminator()) : nullptr;
+ // Create phi nodes to merge from the backedge-taken check block.
+ PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+ ScalarPH->getTerminator());
+ BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
+
+ PHINode *BCTruncResumeVal = nullptr;
+ if (OrigPhi == OldInduction) {
+ BCTruncResumeVal =
+ PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
+ ScalarPH->getTerminator());
+ BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
+ }
+
Value *EndValue = nullptr;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
@@ -2150,10 +2300,12 @@ void InnerLoopVectorizer::createEmptyLoop() {
BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
TruncResumeVal->addIncoming(EndValue, VecBody);
+ BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
+
// We know what the end value is.
EndValue = IdxEndRoundDown;
// We also know which PHI node holds it.
@@ -2199,7 +2351,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// The new PHI merges the original incoming value, in case of a bypass,
// or the value at the end of the vectorized loop.
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) {
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
if (OrigPhi == OldInduction)
ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
else
@@ -2209,11 +2361,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
// Fix the scalar body counter (PHI node).
unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
- // The old inductions phi node in the scalar body needs the truncated value.
- if (OrigPhi == OldInduction)
- OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal);
- else
- OrigPhi->setIncomingValue(BlockIdx, ResumeVal);
+
+ // The old induction's phi node in the scalar body needs the truncated
+ // value.
+ if (OrigPhi == OldInduction) {
+ BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
+ OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
+ } else {
+ BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
+ OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
+ }
}
// If we are generating a new induction variable then we also need to
@@ -2224,7 +2381,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
assert(!ResumeIndex && "Unexpected resume value found");
ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
MiddleBlock->getTerminator());
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
}
@@ -2494,7 +2651,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
// To do so, we need to generate the 'identity' vector and override
// one of the elements with the incoming scalar reduction. We need
// to do it in the vector-loop preheader.
- Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
+ Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
// This is the vector-clone of the value that leaves the loop.
VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
@@ -2568,7 +2725,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
Value *StartVal = (part == 0) ? VectorStart : Identity;
- for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+ for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
NewPhi->addIncoming(RdxExitVal[part],
LoopVectorBody.back());
@@ -2626,6 +2783,13 @@ void InnerLoopVectorizer::vectorizeLoop() {
Builder.getInt32(0));
}
+ // Create a phi node that merges control-flow from the backedge-taken check
+ // block and the middle block.
+ PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
+ LoopScalarPreHeader->getTerminator());
+ BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]);
+ BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+
// Now, we need to fix the users of the reduction variable
// inside and outside of the scalar remainder loop.
// We know that the loop is in LCSSA form. We need to update the
@@ -2655,7 +2819,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
- (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
(RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
}// end of for each redux variable.
@@ -3062,9 +3226,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
scalarizeInstruction(it);
break;
default:
+ bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1);
for (unsigned Part = 0; Part < UF; ++Part) {
SmallVector<Value *, 4> Args;
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+ if (HasScalarOpd && i == 1) {
+ Args.push_back(CI->getArgOperand(i));
+ continue;
+ }
VectorParts &Arg = getVectorValue(CI->getArgOperand(i));
Args.push_back(Arg[Part]);
}
@@ -3112,8 +3281,8 @@ void InnerLoopVectorizer::updateAnalysis() {
}
}
- DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front());
- DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock);
+ DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
+ DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);
@@ -3138,8 +3307,10 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
- if (!EnableIfConversion)
+ if (!EnableIfConversion) {
+ emitAnalysis(Report() << "if-conversion is disabled");
return false;
+ }
assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
@@ -3169,16 +3340,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
- if (!isa<BranchInst>(BB->getTerminator()))
+ if (!isa<BranchInst>(BB->getTerminator())) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "loop contains a switch statement");
return false;
+ }
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication(BB)) {
- if (!blockCanBePredicated(BB, SafePointes))
+ if (!blockCanBePredicated(BB, SafePointes)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
- } else if (BB != Header && !canIfConvertPHINodes(BB))
+ }
+ } else if (BB != Header && !canIfConvertPHINodes(BB)) {
+ emitAnalysis(Report(BB->getTerminator())
+ << "control flow cannot be substituted for a select");
return false;
-
+ }
}
// We can if-convert this loop.
@@ -3188,20 +3367,31 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
bool LoopVectorizationLegality::canVectorize() {
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
- if (!TheLoop->getLoopPreheader())
+ if (!TheLoop->getLoopPreheader()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We can only vectorize innermost loops.
- if (TheLoop->getSubLoopsVector().size())
+ if (TheLoop->getSubLoopsVector().size()) {
+ emitAnalysis(Report() << "loop is not the innermost loop");
return false;
+ }
// We must have a single backedge.
- if (TheLoop->getNumBackEdges() != 1)
+ if (TheLoop->getNumBackEdges() != 1) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We must have a single exiting block.
- if (!TheLoop->getExitingBlock())
+ if (!TheLoop->getExitingBlock()) {
+ emitAnalysis(
+ Report() << "loop control flow is not understood by vectorizer");
return false;
+ }
// We need to have a loop header.
DEBUG(dbgs() << "LV: Found a loop: " <<
@@ -3217,6 +3407,7 @@ bool LoopVectorizationLegality::canVectorize() {
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
if (ExitCount == SE->getCouldNotCompute()) {
+ emitAnalysis(Report() << "could not determine number of loop iterations");
DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
return false;
}
@@ -3310,6 +3501,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy() &&
!PhiTy->isFloatingPointTy() &&
!PhiTy->isPointerTy()) {
+ emitAnalysis(Report(it)
+ << "loop control flow is not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
return false;
}
@@ -3320,13 +3513,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
- if(!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
continue;
+ emitAnalysis(Report(it) << "value that could not be identified as "
+ "reduction is used outside the loop");
return false;
}
// We only allow if-converted PHIs with more than two incoming values.
if (Phi->getNumIncomingValues() != 2) {
+ emitAnalysis(Report(it)
+ << "control flow not understood by vectorizer");
DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
return false;
}
@@ -3357,8 +3554,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "use of induction value outside of the "
+ "loop is not handled by vectorizer");
return false;
+ }
continue;
}
@@ -3401,6 +3601,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ emitAnalysis(Report(it) << "unvectorizable operation");
DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
return false;
}// end of PHI handling
@@ -3409,14 +3610,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
+ emitAnalysis(Report(it) << "call instruction cannot be vectorized");
DEBUG(dbgs() << "LV: Found a call site.\n");
return false;
}
+ // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the
+ // second argument is the same (i.e. loop invariant)
+ if (CI &&
+ hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
+ if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
+ emitAnalysis(Report(it)
+ << "intrinsic instruction cannot be vectorized");
+ DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
+ return false;
+ }
+ }
+
// Check that the instruction return type is vectorizable.
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType(it->getType()) &&
!it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
+ emitAnalysis(Report(it)
+ << "instruction return type cannot be vectorized");
DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
return false;
}
@@ -3424,8 +3640,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Check that the stored type is vectorizable.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T))
+ if (!VectorType::isValidElementType(T)) {
+ emitAnalysis(Report(ST) << "store instruction cannot be vectorized");
return false;
+ }
if (EnableMemAccessVersioning)
collectStridedAcccess(ST);
}
@@ -3436,8 +3654,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
- if (hasOutsideLoopUser(TheLoop, it, AllowedExit))
+ if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
+ emitAnalysis(Report(it) << "value cannot be used outside the loop");
return false;
+ }
} // next instr.
@@ -3445,8 +3665,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!Induction) {
DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
- if (Inductions.empty())
+ if (Inductions.empty()) {
+ emitAnalysis(Report()
+ << "loop induction variable could not be identified");
return false;
+ }
}
return true;
@@ -4353,8 +4576,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
continue;
LoadInst *Ld = dyn_cast<LoadInst>(it);
- if (!Ld) return false;
- if (!Ld->isSimple() && !IsAnnotatedParallel) {
+ if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
+ emitAnalysis(Report(Ld)
+ << "read with atomic ordering or volatile read");
DEBUG(dbgs() << "LV: Found a non-simple load.\n");
return false;
}
@@ -4367,8 +4591,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Save 'store' instructions. Abort if other instructions write to memory.
if (it->mayWriteToMemory()) {
StoreInst *St = dyn_cast<StoreInst>(it);
- if (!St) return false;
+ if (!St) {
+ emitAnalysis(Report(it) << "instruction cannot be vectorized");
+ return false;
+ }
if (!St->isSimple() && !IsAnnotatedParallel) {
+ emitAnalysis(Report(St)
+ << "write with atomic ordering or volatile write");
DEBUG(dbgs() << "LV: Found a non-simple store.\n");
return false;
}
@@ -4405,6 +4634,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Value* Ptr = ST->getPointerOperand();
if (isUniform(Ptr)) {
+ emitAnalysis(
+ Report(ST)
+ << "write to a loop invariant address could not be vectorized");
DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
return false;
}
@@ -4483,6 +4715,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
if (NeedRTCheck && !CanDoRT) {
+ emitAnalysis(Report() << "cannot identify array bounds");
DEBUG(dbgs() << "LV: We can't vectorize because we can't find " <<
"the array bounds.\n");
PtrRtCheck.reset();
@@ -4513,6 +4746,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
+ if (!CanDoRT && NumComparisons > 0)
+ emitAnalysis(Report()
+ << "cannot check memory dependencies at runtime");
+ else
+ emitAnalysis(Report()
+ << NumComparisons << " exceeds limit of "
+ << RuntimeMemoryCheckThreshold
+ << " dependent memory operations checked at runtime");
DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n");
PtrRtCheck.reset();
return false;
@@ -4522,6 +4763,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
+ if (!CanVecMem)
+ emitAnalysis(Report() << "unsafe dependent memory operations in loop");
+
DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") <<
" need a runtime memory check.\n");
@@ -5774,4 +6018,3 @@ Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
return Builder.CreateAdd(Val, C, "induction");
}
-
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e13ba95..53a43d9 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) {
return true;
}
+///\returns Opcode that can be clubbed with \p Op to create an alternate
+/// sequence which can later be merged as a ShuffleVector instruction.
+static unsigned getAltOpcode(unsigned Op) {
+ switch (Op) {
+ case Instruction::FAdd:
+ return Instruction::FSub;
+ case Instruction::FSub:
+ return Instruction::FAdd;
+ case Instruction::Add:
+ return Instruction::Sub;
+ case Instruction::Sub:
+ return Instruction::Add;
+ default:
+ return 0;
+ }
+}
+
+///\returns bool representing if Opcode \p Op can be part
+/// of an alternate sequence which can later be merged as
+/// a ShuffleVector instruction.
+static bool canCombineAsAltInst(unsigned Op) {
+ if (Op == Instruction::FAdd || Op == Instruction::FSub ||
+ Op == Instruction::Sub || Op == Instruction::Add)
+ return true;
+ return false;
+}
+
+/// \returns ShuffleVector instruction if intructions in \p VL have
+/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
+/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
+static unsigned isAltInst(ArrayRef<Value *> VL) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ unsigned Opcode = I0->getOpcode();
+ unsigned AltOpcode = getAltOpcode(Opcode);
+ for (int i = 1, e = VL.size(); i < e; i++) {
+ Instruction *I = dyn_cast<Instruction>(VL[i]);
+ if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode))
+ return 0;
+ }
+ return Instruction::ShuffleVector;
+}
+
/// \returns The opcode if all of the Instructions in \p VL have the same
/// opcode, or zero.
static unsigned getSameOpcode(ArrayRef<Value *> VL) {
@@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
unsigned Opcode = I0->getOpcode();
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
- if (!I || Opcode != I->getOpcode())
+ if (!I || Opcode != I->getOpcode()) {
+ if (canCombineAsAltInst(Opcode) && i == 1)
+ return isAltInst(VL);
return 0;
+ }
}
return Opcode;
}
@@ -377,6 +422,7 @@ public:
/// \brief Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
+
private:
struct TreeEntry;
@@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
bool SameTy = getSameType(VL); (void)SameTy;
+ bool isAltShuffle = false;
assert(SameTy && "Invalid types!");
if (Depth == RecursionMaxDepth) {
@@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, false);
return;
}
+ unsigned Opcode = getSameOpcode(VL);
+
+ // Check that this shuffle vector refers to the alternate
+ // sequence of opcodes.
+ if (Opcode == Instruction::ShuffleVector) {
+ Instruction *I0 = dyn_cast<Instruction>(VL[0]);
+ unsigned Op = I0->getOpcode();
+ if (Op != Instruction::ShuffleVector)
+ isAltShuffle = true;
+ }
// If all of the operands are identical or constant we have a simple solution.
- if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) ||
- !getSameOpcode(VL)) {
+ if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
newTreeEntry(VL, false);
return;
@@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
- unsigned Opcode = getSameOpcode(VL);
-
// Check if it is safe to sink the loads or the stores.
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
Instruction *Last = getLastInstruction(VL);
@@ -914,8 +968,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ BasicBlock *LeftBB = getSameBlock(Left);
+ BasicBlock *RightBB = getSameBlock(Right);
+ // If we have common uses on separate paths in the tree make sure we
+ // process the one with greater common depth first.
+ // We can use block numbering to determine the subtree traversal as
+ // earler user has to come in between the common use and the later user.
+ if (LeftBB && RightBB && LeftBB == RightBB &&
+ getLastIndex(Right) > getLastIndex(Left)) {
+ buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1);
+ } else {
+ buildTree_rec(Left, Depth + 1);
+ buildTree_rec(Right, Depth + 1);
+ }
return;
}
@@ -929,6 +995,51 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
+ case Instruction::GetElementPtr: {
+ // We don't combine GEPs with complicated (nested) indexing.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
+ DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // We can't combine several GEPs into one vector if they operate on
+ // different types.
+ Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType();
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType();
+ if (Ty0 != CurTy) {
+ DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ // We don't combine GEPs with non-constant indexes.
+ for (unsigned j = 0; j < VL.size(); ++j) {
+ auto Op = cast<Instruction>(VL[j])->getOperand(1);
+ if (!isa<ConstantInt>(Op)) {
+ DEBUG(
+ dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
+ newTreeEntry(VL, false);
+ return;
+ }
+ }
+
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
+ for (unsigned i = 0, e = 2; i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
case Instruction::Store: {
// Check if the stores are consecutive or of we need to swizzle them.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
@@ -961,9 +1072,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
-
Function *Int = CI->getCalledFunction();
-
+ Value *A1I = nullptr;
+ if (hasVectorInstrinsicScalarOpd(ID, 1))
+ A1I = CI->getArgOperand(1);
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
if (!CI2 || CI2->getCalledFunction() != Int ||
@@ -973,6 +1085,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
<< "\n");
return;
}
+ // ctlz,cttz and powi are special intrinsics whose second argument
+ // should be same in order for them to be vectorized.
+ if (hasVectorInstrinsicScalarOpd(ID, 1)) {
+ Value *A1J = CI2->getArgOperand(1);
+ if (A1I != A1J) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
+ << " argument "<< A1I<<"!=" << A1J
+ << "\n");
+ return;
+ }
+ }
}
newTreeEntry(VL, true);
@@ -987,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
return;
}
+ case Instruction::ShuffleVector: {
+ // If this is not an alternate sequence of opcode like add-sub
+ // then do not vectorize this instruction.
+ if (!isAltShuffle) {
+ newTreeEntry(VL, false);
+ DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
+ return;
+ }
+ newTreeEntry(VL, true);
+ DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
+ for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
+ ValueList Operands;
+ // Prepare the operand vector.
+ for (unsigned j = 0; j < VL.size(); ++j)
+ Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
+
+ buildTree_rec(Operands, Depth + 1);
+ }
+ return;
+ }
default:
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
@@ -1010,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return getGatherCost(E->Scalars);
}
-
- assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) &&
- "Invalid VL");
+ unsigned Opcode = getSameOpcode(VL);
+ assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
Instruction *VL0 = cast<Instruction>(VL[0]);
- unsigned Opcode = VL0->getOpcode();
switch (Opcode) {
case Instruction::PHI: {
return 0;
@@ -1121,6 +1263,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
return VecCost - ScalarCost;
}
+ case Instruction::GetElementPtr: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_UniformConstantValue;
+
+ int ScalarCost =
+ VecTy->getNumElements() *
+ TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK);
+ int VecCost =
+ TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK);
+
+ return VecCost - ScalarCost;
+ }
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
int ScalarLdCost = VecTy->getNumElements() *
@@ -1158,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
return VecCallCost - ScalarCallCost;
}
+ case Instruction::ShuffleVector: {
+ TargetTransformInfo::OperandValueKind Op1VK =
+ TargetTransformInfo::OK_AnyValue;
+ TargetTransformInfo::OperandValueKind Op2VK =
+ TargetTransformInfo::OK_AnyValue;
+ int ScalarCost = 0;
+ int VecCost = 0;
+ for (unsigned i = 0; i < VL.size(); ++i) {
+ Instruction *I = cast<Instruction>(VL[i]);
+ if (!I)
+ break;
+ ScalarCost +=
+ TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK);
+ }
+ // VecCost is equal to sum of the cost of creating 2 vectors
+ // and the cost of creating shuffle.
+ Instruction *I0 = cast<Instruction>(VL[0]);
+ VecCost =
+ TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK);
+ Instruction *I1 = cast<Instruction>(VL[1]);
+ VecCost +=
+ TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
+ VecCost +=
+ TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
+ return VecCost - ScalarCost;
+ }
default:
llvm_unreachable("Unknown instruction");
}
@@ -1438,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
setInsertPointAfterBundle(E->Scalars);
return Gather(E->Scalars, VecTy);
}
-
- unsigned Opcode = VL0->getOpcode();
- assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode");
+ unsigned Opcode = getSameOpcode(E->Scalars);
switch (Opcode) {
case Instruction::PHI: {
@@ -1649,12 +1829,52 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = S;
return propagateMetadata(S, E->Scalars);
}
+ case Instruction::GetElementPtr: {
+ setInsertPointAfterBundle(E->Scalars);
+
+ ValueList Op0VL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0));
+
+ Value *Op0 = vectorizeTree(Op0VL);
+
+ std::vector<Value *> OpVecs;
+ for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
+ ++j) {
+ ValueList OpVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i)
+ OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j));
+
+ Value *OpVec = vectorizeTree(OpVL);
+ OpVecs.push_back(OpVec);
+ }
+
+ Value *V = Builder.CreateGEP(Op0, OpVecs);
+ E->VectorizedValue = V;
+
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
+ return V;
+ }
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E->Scalars);
+ Function *FI;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ if (CI && (FI = CI->getCalledFunction())) {
+ IID = (Intrinsic::ID) FI->getIntrinsicID();
+ }
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
ValueList OpVL;
+ // ctlz,cttz and powi are special intrinsics whose second argument is
+ // a scalar. This argument should not be vectorized.
+ if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) {
+ CallInst *CEI = cast<CallInst>(E->Scalars[0]);
+ OpVecs.push_back(CEI->getArgOperand(j));
+ continue;
+ }
for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
CallInst *CEI = cast<CallInst>(E->Scalars[i]);
OpVL.push_back(CEI->getArgOperand(j));
@@ -1673,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->VectorizedValue = V;
return V;
}
+ case Instruction::ShuffleVector: {
+ ValueList LHSVL, RHSVL;
+ for (int i = 0, e = E->Scalars.size(); i < e; ++i) {
+ LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0));
+ RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1));
+ }
+ setInsertPointAfterBundle(E->Scalars);
+
+ Value *LHS = vectorizeTree(LHSVL);
+ Value *RHS = vectorizeTree(RHSVL);
+
+ if (Value *V = alreadyVectorized(E->Scalars))
+ return V;
+
+ // Create a vector of LHS op1 RHS
+ BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0);
+ Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS);
+
+ // Create a vector of LHS op2 RHS
+ Instruction *VL1 = cast<Instruction>(E->Scalars[1]);
+ BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1);
+ Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS);
+
+ // Create appropriate shuffle to take alternative operations from
+ // the vector.
+ std::vector<Constant *> Mask(E->Scalars.size());
+ unsigned e = E->Scalars.size();
+ for (unsigned i = 0; i < e; ++i) {
+ if (i & 1)
+ Mask[i] = Builder.getInt32(e + i);
+ else
+ Mask[i] = Builder.getInt32(i);
+ }
+
+ Value *ShuffleMask = ConstantVector::get(Mask);
+
+ Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+ E->VectorizedValue = V;
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ return propagateMetadata(I, E->Scalars);
+
+ return V;
+ }
default:
llvm_unreachable("unknown inst");
}
@@ -1741,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() {
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
-
// No need to handle users of gathered values.
if (Entry->NeedToGather)
continue;
@@ -1925,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass {
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
e = po_end(&F.getEntryBlock()); it != e; ++it) {
BasicBlock *BB = *it;
-
// Vectorize trees that end at stores.
if (unsigned count = collectStores(BB, R)) {
(void)count;