aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/AArch64/AArch64.td16
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp27
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp9
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp16
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp182
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp234
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h5
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td215
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp6
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td54
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp30
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h3
-rw-r--r--lib/Target/AArch64/AArch64SchedA57.td20
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp3
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h6
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp9
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp32
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp13
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp96
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h131
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp8
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp10
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp2
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp15
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h9
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp6
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp1429
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h78
-rw-r--r--lib/Target/ARM/ARM.td36
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp32
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp21
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h2
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp3
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp27
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td76
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td18
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td43
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp9
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp63
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp56
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp729
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.h223
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp16
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp22
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp2
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp13
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h27
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp12
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp5
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp7
-rw-r--r--lib/Target/BPF/BPFISelDAGToDAG.cpp2
-rw-r--r--lib/Target/BPF/BPFISelLowering.cpp15
-rw-r--r--lib/Target/BPF/BPFInstrInfo.td57
-rw-r--r--lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp2
-rw-r--r--lib/Target/BPF/InstPrinter/BPFInstPrinter.h3
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp4
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp2
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp5
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp11
-rw-r--r--lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h3
-rw-r--r--lib/Target/BPF/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp24
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h2
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonExpandCondsets.cpp1348
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp11
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h4
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp24
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp8
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp5
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h3
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp11
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h5
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp2
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h3
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp5
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp1
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430MCInstLower.cpp4
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp76
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp2
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp185
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp13
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h9
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp8
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp17
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp19
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h4
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp3
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp31
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp17
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h4
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td10
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp22
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h5
-rw-r--r--lib/Target/Mips/MipsOptionRecord.h2
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp20
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h4
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h2
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h3
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp10
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h5
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTX.td22
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp77
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp14
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h2
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp70
-rw-r--r--lib/Target/NVPTX/NVPTXTargetTransformInfo.h2
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp52
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp2
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp2
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp8
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h43
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp4
-rw-r--r--lib/Target/PowerPC/PPC.td77
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp19
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp7
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp31
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp150
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h17
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td36
-rw-r--r--lib/Target/PowerPC/PPCInstrFormats.td6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td64
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td25
-rw-r--r--lib/Target/PowerPC/PPCLoopDataPrefetch.cpp9
-rw-r--r--lib/Target/PowerPC/PPCLoopPreIncPrep.cpp39
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp9
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h6
-rw-r--r--lib/Target/PowerPC/PPCTargetStreamer.h2
-rw-r--r--lib/Target/PowerPC/README.txt19
-rw-r--r--lib/Target/PowerPC/README_ALTIVEC.txt2
-rw-r--r--lib/Target/R600/AMDGPU.td24
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp22
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h4
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp2
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp12
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td4
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp3
-rw-r--r--lib/Target/R600/AMDGPUPromoteAlloca.cpp4
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp1
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h3
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp2
-rw-r--r--lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp1094
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp19
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h5
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp5
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h3
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp2
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp4
-rw-r--r--lib/Target/R600/R600Instructions.td5
-rw-r--r--lib/Target/R600/R600TextureIntrinsicsReplacer.cpp4
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp19
-rw-r--r--lib/Target/R600/SIISelLowering.cpp37
-rw-r--r--lib/Target/R600/SIISelLowering.h4
-rw-r--r--lib/Target/R600/SIInstrFormats.td31
-rw-r--r--lib/Target/R600/SIInstrInfo.td274
-rw-r--r--lib/Target/R600/SIInstructions.td43
-rw-r--r--lib/Target/R600/SIRegisterInfo.td47
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp2
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp46
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.h40
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp13
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h4
-rw-r--r--lib/Target/Sparc/Sparc.td7
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp18
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp3
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h2
-rw-r--r--lib/Target/SystemZ/CMakeLists.txt1
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp2
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp3
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h3
-rw-r--r--lib/Target/SystemZ/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp5
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h3
-rw-r--r--lib/Target/SystemZ/SystemZ.h12
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.cpp23
-rw-r--r--lib/Target/SystemZ/SystemZAsmPrinter.h1
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp11
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp250
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h20
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td11
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp34
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td65
-rw-r--r--lib/Target/SystemZ/SystemZOperators.td13
-rw-r--r--lib/Target/SystemZ/SystemZProcessors.td30
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.h18
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h1
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.cpp240
-rw-r--r--lib/Target/SystemZ/SystemZTargetTransformInfo.h70
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--lib/Target/TargetMachine.cpp2
-rw-r--r--lib/Target/TargetMachineC.cpp20
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp2
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp49
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h11
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp3
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.h3
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp12
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h34
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp5
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp4
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp8
-rw-r--r--lib/Target/X86/X86.td20
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp23
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp138
-rw-r--r--lib/Target/X86/X86ISelLowering.h3
-rw-r--r--lib/Target/X86/X86InstrAVX512.td155
-rw-r--r--lib/Target/X86/X86InstrArithmetic.td8
-rw-r--r--lib/Target/X86/X86InstrCompiler.td11
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td28
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp32
-rw-r--r--lib/Target/X86/X86InstrSSE.td63
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h14
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp2
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h3
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp5
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp8
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp8
-rw-r--r--lib/Target/XCore/XCoreTargetStreamer.h2
273 files changed, 7754 insertions, 2749 deletions
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index bb3db4b..9a7d6c8 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,9 +32,6 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
"Enable ARMv8 CRC-32 checksum instructions">;
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
- "Enable ARMv8.1a extensions", [FeatureCRC]>;
-
/// Cyclone has register move instructions which are "free".
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -44,6 +41,13 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
"Has zero-cycle zeroing instructions">;
//===----------------------------------------------------------------------===//
+// Architectures.
+//
+
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
+ "Support ARM v8.1a instructions", [FeatureCRC]>;
+
+//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -92,10 +96,6 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
FeatureNEON,
FeatureCRC]>;
-def : ProcessorModel<"generic-armv8.1-a", NoSchedModel, [FeatureV8_1a,
- FeatureNEON,
- FeatureCrypto]>;
-
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -123,12 +123,14 @@ def AppleAsmParserVariant : AsmParserVariant {
// AsmWriter bits get associated with the correct class.
def GenericAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
int Variant = 0;
bit isMCAsmWriter = 1;
}
def AppleAsmWriter : AsmWriter {
let AsmWriterClassName = "AppleInstPrinter";
+ int PassSubtarget = 1;
int Variant = 1;
int isMCAsmWriter = 1;
}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 1b4483a..0821cff 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -131,29 +131,6 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
SM.serializeToStackMapSection();
}
-
- // Emit a .data.rel section containing any stubs that were created.
- if (TT.isOSBinFormatELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
- }
-
}
MachineLocation
@@ -371,8 +348,8 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
assert(NOps == 4);
OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
// cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
- OS << V.getName();
+ OS << cast<MDLocalVariable>(MI->getOperand(NOps - 2).getMetadata())
+ ->getName();
OS << " <- ";
// Frame address. Currently handles register +- offset only.
assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index 568f258..efdb2e3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -328,7 +328,7 @@ static void initReachingDef(const MachineFunction &MF,
const uint32_t *PreservedRegs = MO.getRegMask();
// Set generated regs.
- for (const auto Entry : RegToId) {
+ for (const auto &Entry : RegToId) {
unsigned Reg = Entry.second;
// Use the global register ID when querying APIs external to this
// pass.
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 41b1132..c2470f7 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -698,12 +698,15 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return expandMOVImm(MBB, MBBI, 32);
case AArch64::MOVi64imm:
return expandMOVImm(MBB, MBBI, 64);
- case AArch64::RET_ReallyLR:
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
- .addReg(AArch64::LR);
+ case AArch64::RET_ReallyLR: {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+ .addReg(AArch64::LR);
+ transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
return true;
}
+ }
return false;
}
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 99cb641..c3f6859 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1917,7 +1917,8 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
// could select it. Emit a copy to subreg if necessary. FastISel will remove
// it when it selects the integer extend.
unsigned Reg = lookUpRegForValue(IntExtVal);
- if (!Reg) {
+ auto *MI = MRI.getUniqueVRegDef(Reg);
+ if (!MI) {
if (RetVT == MVT::i64 && VT <= MVT::i32) {
if (WantZExt) {
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
@@ -1935,10 +1936,7 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
// The integer extend has already been emitted - delete all the instructions
// that have been emitted by the integer extend lowering code and use the
// result from the load instruction directly.
- while (Reg) {
- auto *MI = MRI.getUniqueVRegDef(Reg);
- if (!MI)
- break;
+ while (MI) {
Reg = 0;
for (auto &Opnd : MI->uses()) {
if (Opnd.isReg()) {
@@ -1947,6 +1945,9 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
}
}
MI->eraseFromParent();
+ MI = nullptr;
+ if (Reg)
+ MI = MRI.getUniqueVRegDef(Reg);
}
updateValueMap(IntExtVal, ResultReg);
return true;
@@ -3034,6 +3035,11 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
// Copy all of the result registers out of their specified physreg.
MVT CopyVT = RVLocs[0].getValVT();
+
+ // TODO: Handle big-endian results
+ if (CopyVT.isVector() && !Subtarget->isLittleEndian())
+ return false;
+
unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), ResultReg)
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index 84bf317..bd2af16 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -9,6 +9,82 @@
//
// This file contains the AArch64 implementation of TargetFrameLowering class.
//
+// On AArch64, stack frames are structured as follows:
+//
+// The stack grows downward.
+//
+// All of the individual frame areas on the frame below are optional, i.e. it's
+// possible to create a function so that the particular area isn't present
+// in the frame.
+//
+// At function entry, the "frame" looks as follows:
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// After the prologue has run, the frame has the following general structure.
+// Note that this doesn't depict the case where a red-zone is used. Also,
+// technically the last frame area (VLAs) doesn't get created until in the
+// main function body, after the prologue is run. However, it's depicted here
+// for completeness.
+//
+// | | Higher address
+// |-----------------------------------|
+// | |
+// | arguments passed on the stack |
+// | |
+// |-----------------------------------|
+// | |
+// | prev_fp, prev_lr |
+// | (a.k.a. "frame record") |
+// |-----------------------------------| <- fp(=x29)
+// | |
+// | other callee-saved registers |
+// | |
+// |-----------------------------------|
+// |.empty.space.to.make.part.below....|
+// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
+// |.the.standard.16-byte.alignment....| compile time; if present)
+// |-----------------------------------|
+// | |
+// | local variables of fixed size |
+// | including spill slots |
+// |-----------------------------------| <- bp(not defined by ABI,
+// |.variable-sized.local.variables....| LLVM chooses X19)
+// |.(VLAs)............................| (size of this area is unknown at
+// |...................................| compile time)
+// |-----------------------------------| <- sp
+// | | Lower address
+//
+//
+// To access the data in a frame, at-compile time, a constant offset must be
+// computable from one of the pointers (fp, bp, sp) to access it. The size
+// of the areas with a dotted background cannot be computed at compile-time
+// if they are present, making it required to have all three of fp, bp and
+// sp to be set up to be able to access all contents in the frame areas,
+// assuming all of the frame areas are non-empty.
+//
+// For most functions, some of the frame areas are empty. For those functions,
+// it may not be necessary to set up fp or bp:
+// * A base pointer is definitly needed when there are both VLAs and local
+// variables with more-than-default alignment requirements.
+// * A frame pointer is definitly needed when there are local variables with
+// more-than-default alignment requirements.
+//
+// In some cases when a base pointer is not strictly needed, it is generated
+// anyway when offsets from the frame pointer to access local variables become
+// so large that the offset can't be encoded in the immediate fields of loads
+// or stores.
+//
+// FIXME: also explain the redzone concept.
+// FIXME: also explain the concept of reserved call frames.
+//
//===----------------------------------------------------------------------===//
#include "AArch64FrameLowering.h"
@@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset)
- Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset + Align - 1) / Align * Align;
- }
- // This does not include the 16 bytes used for fp and lr.
- return (unsigned)Offset;
-}
-
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
/// pointer register.
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- assert(!RegInfo->needsStackRealignment(MF) &&
- "No stack realignment on AArch64!");
-#endif
-
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
- MFI->hasPatchPoint());
+ MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setLocalStackSize(NumBytes);
// Allocate space for the rest of the frame.
- if (NumBytes) {
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
+
+ const unsigned Alignment = MFI->getMaxAlignment();
+ const bool NeedsRealignment = (Alignment > 16);
+ unsigned scratchSPReg = AArch64::SP;
+ if (NeedsRealignment) {
+ // Use the first callee-saved register as a scratch register
+ assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
+ "No scratch register to align SP!");
+ scratchSPReg = AArch64::X9;
+ }
+
+ // If we're a leaf function, try using the red zone.
+ if (NumBytes && !canUseRedZone(MF))
+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
+ // the correct value here, as NumBytes also includes padding bytes,
+ // which shouldn't be counted here.
+ emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
+
+ assert(!(NeedsRealignment && NumBytes==0) &&
+ "NumBytes should never be 0 when realignment is needed");
+
+ if (NumBytes && NeedsRealignment) {
+ const unsigned NrBitsToZero = countTrailingZeros(Alignment);
+ assert(NrBitsToZero > 1);
+ assert(scratchSPReg != AArch64::SP);
+
+ // SUB X9, SP, NumBytes
+ // -- X9 is temporary register, so shouldn't contain any live data here,
+ // -- free to use. This is already produced by emitFrameOffset above.
+ // AND SP, X9, 0b11111...0000
+ // The logical immediates have a non-trivial encoding. The following
+ // formula computes the encoded immediate with all ones but
+ // NrBitsToZero zero bits as least significant bits.
+ uint32_t andMaskEncoded =
+ (1 <<12) // = N
+ | ((64-NrBitsToZero) << 6) // immr
+ | ((64-NrBitsToZero-1) << 0) // imms
+ ;
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
+ .addReg(scratchSPReg, RegState::Kill)
+ .addImm(andMaskEncoded);
}
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
- //
- if (RegInfo->hasBasePointer(MF))
- TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+ if (RegInfo->hasBasePointer(MF)) {
+ TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
+ false);
+ }
if (needsFrameMoves) {
const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
// An example of the prologue:
//
// .globl __foo
@@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
- // Initial and residual are named for consitency with the prologue. Note that
+ // Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = 0;
if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
bool isFixed = MFI->isFixedObjectIndex(FI);
// Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- // Make sure useFPForScavengingIndex() does the right thing for the emergency
- // spill slot.
+ // there are VLAs or a dynamically realigned SP (and thus the SP isn't
+ // reliable as a base). Make sure useFPForScavengingIndex() does the
+ // right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame()) {
// Note: Keeping the following as multiple 'if' statements rather than
@@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+ } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
+ !RegInfo->needsStackRealignment(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
@@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
}
+ assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
+ "In the presence of dynamic stack pointer realignment, "
+ "non-argument objects cannot be accessed through the frame pointer");
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@@ -695,6 +787,8 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
MIB.addReg(AArch64::SP, RegState::Define);
+ MBB.addLiveIn(Reg1);
+ MBB.addLiveIn(Reg2);
MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
@@ -794,6 +888,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+ if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
+ MRI->setPhysRegUsed(AArch64::X9);
+
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
@@ -867,7 +964,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize =
+ MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index df3875f..1439bf3 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/) {}
+ true /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 0a47dcb..f75700d 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -848,7 +848,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
// MOV X0, WideImmediate
// LDR X2, [BaseReg, X0]
if (isa<ConstantSDNode>(RHS)) {
- int64_t ImmOff = (int64_t)dyn_cast<ConstantSDNode>(RHS)->getZExtValue();
+ int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
unsigned Scale = Log2_32(Size);
// Skip the immediate can be seleced by load/store addressing mode.
// Also skip the immediate can be encoded by a single ADD (SUB is also
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0c0e856..90a5e5e 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -281,14 +281,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
- // f16 is storage-only, so we promote operations to f32 if we know this is
- // valid, and ignore them otherwise. The operations not mentioned here will
- // fail to select, but this is not a major problem as no source language
- // should be emitting native f16 operations yet.
- setOperationAction(ISD::FADD, MVT::f16, Promote);
- setOperationAction(ISD::FDIV, MVT::f16, Promote);
- setOperationAction(ISD::FMUL, MVT::f16, Promote);
- setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ // f16 is a storage-only type, always promote it to f32.
+ setOperationAction(ISD::SETCC, MVT::f16, Promote);
+ setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+ setOperationAction(ISD::SELECT, MVT::f16, Promote);
+ setOperationAction(ISD::FADD, MVT::f16, Promote);
+ setOperationAction(ISD::FSUB, MVT::f16, Promote);
+ setOperationAction(ISD::FMUL, MVT::f16, Promote);
+ setOperationAction(ISD::FDIV, MVT::f16, Promote);
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FMA, MVT::f16, Promote);
+ setOperationAction(ISD::FNEG, MVT::f16, Promote);
+ setOperationAction(ISD::FABS, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+ setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
// v4f16 is also a storage-only type, so promote it to v4f32 when that is
// known to be safe.
@@ -481,6 +506,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Enable TBZ/TBNZ
MaskAndBranchFoldingIsLegal = true;
+ EnableExtLdPromotion = true;
setMinFunctionAlignment(2);
@@ -1557,6 +1583,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getOperand(0).getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ Op.getOpcode(), dl, Op.getValueType(),
+ DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+ }
+
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
@@ -1606,6 +1640,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
+ // f16 conversions are promoted to f32.
+ if (Op.getValueType() == MVT::f16) {
+ SDLoc dl(Op);
+ return DAG.getNode(
+ ISD::FP_ROUND, dl, MVT::f16,
+ DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+ DAG.getIntPtrConstant(0));
+ }
+
// i128 conversions are libcalls.
if (Op.getOperand(0).getValueType() == MVT::i128)
return SDValue();
@@ -2701,8 +2744,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
- /*isVol = */ false,
- /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+ /*isVol = */ false, /*AlwaysInline = */ false,
+ /*isTailCall = */ false,
+ DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
@@ -3514,49 +3558,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
}
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue CC = Op->getOperand(0);
- SDValue TVal = Op->getOperand(1);
- SDValue FVal = Op->getOperand(2);
- SDLoc DL(Op);
-
- unsigned Opc = CC.getOpcode();
- // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
- // instruction.
- if (CC.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- // Only lower legal XALUO ops.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
- return SDValue();
-
- AArch64CC::CondCode OFCC;
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
- return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
- CCVal, Overflow);
- }
-
- if (CC.getOpcode() == ISD::SETCC)
- return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
- cast<CondCodeSDNode>(CC.getOperand(2))->get());
- else
- return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
- FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+ SDValue RHS, SDValue TVal,
+ SDValue FVal, SDLoc dl,
SelectionDAG &DAG) const {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TVal = Op.getOperand(2);
- SDValue FVal = Op.getOperand(3);
- SDLoc dl(Op);
-
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
@@ -3664,14 +3669,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- EVT VT = Op.getValueType();
+ EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
- EVT VT = Op.getValueType();
+ EVT VT = TVal.getValueType();
// Try to match this select into a max/min operation, which have dedicated
// opcode in the instruction set.
@@ -3732,6 +3737,58 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
return CS1;
}
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TVal = Op.getOperand(2);
+ SDValue FVal = Op.getOperand(3);
+ SDLoc DL(Op);
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CCVal = Op->getOperand(0);
+ SDValue TVal = Op->getOperand(1);
+ SDValue FVal = Op->getOperand(2);
+ SDLoc DL(Op);
+
+ unsigned Opc = CCVal.getOpcode();
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+ // instruction.
+ if (CCVal.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+ return SDValue();
+
+ AArch64CC::CondCode OFCC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+ SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+
+ return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+ CCVal, Overflow);
+ }
+
+ // Lower it the same way as we would lower a SELECT_CC node.
+ ISD::CondCode CC;
+ SDValue LHS, RHS;
+ if (CCVal.getOpcode() == ISD::SETCC) {
+ LHS = CCVal.getOperand(0);
+ RHS = CCVal.getOperand(1);
+ CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+ } else {
+ LHS = CCVal;
+ RHS = DAG.getConstant(0, CCVal.getValueType());
+ CC = ISD::SETNE;
+ }
+ return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
@@ -3920,7 +3977,7 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
- 8, false, false, MachinePointerInfo(DestSV),
+ 8, false, false, false, MachinePointerInfo(DestSV),
MachinePointerInfo(SrcSV));
}
@@ -4989,7 +5046,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
- else if (EltTy == MVT::i16)
+ else if (EltTy == MVT::i16 || EltTy == MVT::f16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
@@ -6554,6 +6611,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
VT1.getSizeInBits() <= 32);
}
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+ if (isa<FPExtInst>(Ext))
+ return false;
+
+ // Vector types are next free.
+ if (Ext->getType()->isVectorTy())
+ return false;
+
+ for (const Use &U : Ext->uses()) {
+ // The extension is free if we can fold it with a left shift in an
+ // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+ // Is there a shift?
+ const Instruction *Instr = cast<Instruction>(U.getUser());
+
+ // Is this a constant shift?
+ switch (Instr->getOpcode()) {
+ case Instruction::Shl:
+ if (!isa<ConstantInt>(Instr->getOperand(1)))
+ return false;
+ break;
+ case Instruction::GetElementPtr: {
+ gep_type_iterator GTI = gep_type_begin(Instr);
+ std::advance(GTI, U.getOperandNo());
+ Type *IdxTy = *GTI;
+ // This extension will end up with a shift because of the scaling factor.
+ // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+ // Get the shift amount based on the scaling factor:
+ // log2(sizeof(IdxTy)) - log2(8).
+ uint64_t ShiftAmt =
+ countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+ // Is the constant foldable in the shift of the addressing mode?
+ // I.e., shift amount is between 1 and 4 inclusive.
+ if (ShiftAmt == 0 || ShiftAmt > 4)
+ return false;
+ break;
+ }
+ case Instruction::Trunc:
+ // Check if this is a noop.
+ // trunc(sext ty1 to ty2) to ty1.
+ if (Instr->getType() == Ext->getOperand(0)->getType())
+ continue;
+ // FALL THROUGH.
+ default:
+ return false;
+ }
+
+ // At this point we can use the bfm family, so this extension is free
+ // for that use.
+ }
+ return true;
+}
+
bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
unsigned &RequiredAligment) const {
if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6597,7 +6707,17 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
(allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
return MVT::f128;
- return Size >= 8 ? MVT::i64 : MVT::i32;
+ if (Size >= 8 &&
+ (memOpAlign(SrcAlign, DstAlign, 8) ||
+ (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+ return MVT::i64;
+
+ if (Size >= 4 &&
+ (memOpAlign(SrcAlign, DstAlign, 4) ||
+ (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+ return MVT::i32;
+
+ return MVT::Other;
}
// 12-bit optionally shifted immediates are legal for adds.
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index 5ff11e8..820613b 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -355,6 +355,8 @@ public:
getPreferredVectorAction(EVT VT) const override;
private:
+ bool isExtFreeImpl(const Instruction *Ext) const override;
+
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
@@ -418,6 +420,9 @@ private:
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
+ SDValue TVal, SDValue FVal, SDLoc dl,
+ SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index d295c02..0c0efaf 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1637,10 +1637,16 @@ multiclass AddSub<bit isSub, string mnemonic,
SDPatternOperator OpNode = null_frag> {
let hasSideEffects = 0, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// Add/Subtract immediate
+ // Increase the weight of the immediate variant to try to match it before
+ // the extended register variant.
+ // We used to match the register variant before the immediate when the
+ // register argument could be implicitly zero-extended.
+ let AddedComplexity = 6 in
def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
mnemonic, OpNode> {
let Inst{31} = 0;
}
+ let AddedComplexity = 6 in
def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
mnemonic, OpNode> {
let Inst{31} = 1;
@@ -3282,6 +3288,10 @@ class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
: BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
bits<5> Rt;
bits<5> Rn;
+ let Inst{20-16} = 0b11111;
+ let Unpredictable{20-16} = 0b11111;
+ let Inst{14-10} = 0b11111;
+ let Unpredictable{14-10} = 0b11111;
let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
@@ -5298,6 +5308,27 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
+ dag oops, dag iops, string asm,
+ list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = R;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {
def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
@@ -5325,6 +5356,16 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
}
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
+ (ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
+ asm, []>;
+ def v1i16: BaseSIMDThreeScalarTied<U, 0b01, R, opc, (outs FPR16:$dst),
+ (ins FPR16:$Rd, FPR16:$Rn, FPR16:$Rm),
+ asm, []>;
+}
+
multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -5885,7 +5926,7 @@ multiclass SIMDIns {
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
let Inst{14-12} = idx2;
- let Inst{11} = 0;
+ let Inst{11} = {?};
}
def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
bits<2> idx;
@@ -5893,7 +5934,7 @@ multiclass SIMDIns {
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
let Inst{14-13} = idx2;
- let Inst{12-11} = 0;
+ let Inst{12-11} = {?,?};
}
def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
bits<1> idx;
@@ -5901,7 +5942,7 @@ multiclass SIMDIns {
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
let Inst{14} = idx2;
- let Inst{13-11} = 0;
+ let Inst{13-11} = {?,?,?};
}
// For all forms of the INS instruction, the "mov" mnemonic is the
@@ -8517,6 +8558,174 @@ multiclass SIMDLdSt4SingleAliases<string asm> {
} // end of 'let Predicates = [HasNEON]'
//----------------------------------------------------------------------------
+// AdvSIMD v8.1 Rounding Double Multiply Add/Subtract
+//----------------------------------------------------------------------------
+
+let Predicates = [HasNEON, HasV8_1a] in {
+
+class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind,
+ pattern> {
+ let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b01, opc, V64, asm, ".4h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh (v4i16 V64:$Rn),
+ (v4i16 V64:$Rm)))))]>;
+ def v8i16 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b01, opc, V128, asm, ".8h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh (v8i16 V128:$Rn),
+ (v8i16 V128:$Rm)))))]>;
+ def v2i32 : BaseSIMDThreeSameVectorTiedR0<0, U, 0b10, opc, V64, asm, ".2s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh (v2i32 V64:$Rn),
+ (v2i32 V64:$Rm)))))]>;
+ def v4i32 : BaseSIMDThreeSameVectorTiedR0<1, U, 0b10, opc, V128, asm, ".4s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V64, V64, V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$dst),
+ (Accum (v4i16 V64:$Rd),
+ (v4i16 (int_aarch64_neon_sqrdmulh
+ (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128, V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$dst),
+ (Accum (v8i16 V128:$Rd),
+ (v8i16 (int_aarch64_neon_sqrdmulh
+ (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V64, V64, V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$dst),
+ (Accum (v2i32 V64:$Rd),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ // FIXME: direct EXTRACT_SUBREG from v2i32 to i32 is illegal, that's why we
+ // got it lowered here as (i32 vector_extract (v4i32 insert_subvector(..)))
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (insert_subvector
+ (undef),
+ (v2i32 (int_aarch64_neon_sqrdmulh
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i32 0))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v2i32 (!cast<Instruction>(NAME # v2i32_indexed)
+ (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V64:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128, V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but
+ // an intermediate EXTRACT_SUBREG would be untyped.
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract
+ (v4i32 (int_aarch64_neon_sqrdmulh
+ (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32
+ (v4i32 V128:$Rm),
+ VectorIndexS:$idx)))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (v4i32 (!cast<Instruction>(NAME # v4i32_indexed)
+ (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ FPR32Op:$Rd,
+ ssub)),
+ V128:$Rn,
+ V128:$Rm,
+ VectorIndexS:$idx)),
+ ssub)>;
+
+ def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo,
+ VectorIndexH, asm, ".h", "", "", ".h",
+ []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i32 FPR32Op:$dst),
+ (Accum (i32 FPR32Op:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh
+ (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+} // let Predicates = [HasNeon, HasV8_1a]
+
+//----------------------------------------------------------------------------
// Crypto extensions
//----------------------------------------------------------------------------
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 8e0af2d..db231c4 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
}
for (; SubReg != End; SubReg += Incr) {
- const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI);
@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
}
assert(Opc && "Unknown register class");
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
.addReg(DestReg, getDefRegState(true))
.addFrameIndex(FI);
if (Offset)
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index ec6fa5c..f7db50a 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -14,6 +14,8 @@
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
@@ -22,8 +24,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
- AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsCyclone : Predicate<"Subtarget->isCyclone()">;
@@ -2314,6 +2314,20 @@ def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
+let Predicates = [HasV8_1a] in {
+ // v8.1a "Limited Order Region" extension load-acquire instructions
+ def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
+ def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
+ def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
+ def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
+
+ // v8.1a "Limited Order Region" extension store-release instructions
+ def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
+ def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
+ def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
+ def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
+}
+
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
@@ -2778,6 +2792,10 @@ defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
@@ -2994,6 +3012,20 @@ defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+let Predicates = [HasV8_1a] in {
+ defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
+ defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
+ def : Pat<(i32 (int_aarch64_neon_sqadd
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+ def : Pat<(i32 (int_aarch64_neon_sqsub
+ (i32 FPR32:$Rd),
+ (i32 (int_aarch64_neon_sqrdmulh (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+}
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
@@ -3478,13 +3510,13 @@ def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
// AdvSIMD INS/DUP instructions
//----------------------------------------------------------------------------
-def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
def DUPv2i64lane : SIMDDup64FromElement;
def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
@@ -4324,6 +4356,10 @@ defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
+defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
+ int_aarch64_neon_sqadd>;
+defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
+ int_aarch64_neon_sqsub>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 33c11fe..1836682 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// large enough that referencing from the FP won't result in things being
// in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
+ // Furthermore, if both variable sized objects are present, and the
+ // stack needs to be dynamically re-aligned, the base pointer is the only
+ // reliable way to reference the locals.
if (MFI->hasVarSizedObjects()) {
+ if (needsStackRealignment(MF))
+ return true;
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
+bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+
+ return true;
+}
+
+// FIXME: share this with other backends with identical implementation?
+bool
+AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget()
+ .getSubtargetImpl(*MF.getFunction())
+ ->getFrameLowering()
+ ->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index c01bfa5..8c379d9 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -93,6 +93,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
+ // Base pointer (stack realignment) support.
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td
index 3ec4157..ca4457a 100644
--- a/lib/Target/AArch64/AArch64SchedA57.td
+++ b/lib/Target/AArch64/AArch64SchedA57.td
@@ -60,7 +60,12 @@ include "AArch64SchedA57WriteRes.td"
// Cortex-A57. The Cortex-A57 types are directly associated with resources, so
// defining the aliases precludes the need for mapping them using WriteRes. The
// aliases are sufficient for creating a coarse, working model. As the model
-// evolves, InstRWs will be used to override these SchedAliases.
+// evolves, InstRWs will be used to override some of these SchedAliases.
+//
+// WARNING: Using SchedAliases is convenient and works well for latency and
+// resource lookup for instructions. However, this creates an entry in
+// AArch64WriteLatencyTable with a WriteResourceID of 0, breaking
+// any SchedReadAdvance since the lookup will fail.
def : SchedAlias<WriteImm, A57Write_1cyc_1I>;
def : SchedAlias<WriteI, A57Write_1cyc_1I>;
@@ -70,8 +75,8 @@ def : SchedAlias<WriteExtr, A57Write_1cyc_1I>;
def : SchedAlias<WriteIS, A57Write_1cyc_1I>;
def : SchedAlias<WriteID32, A57Write_19cyc_1M>;
def : SchedAlias<WriteID64, A57Write_35cyc_1M>;
-def : SchedAlias<WriteIM32, A57Write_3cyc_1M>;
-def : SchedAlias<WriteIM64, A57Write_5cyc_1M>;
+def : WriteRes<WriteIM32, [A57UnitM]> { let Latency = 3; }
+def : WriteRes<WriteIM64, [A57UnitM]> { let Latency = 5; }
def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>;
def : SchedAlias<WriteLD, A57Write_4cyc_1L>;
@@ -127,6 +132,15 @@ def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>;
def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>;
+// Shifted Register with Shift == 0
+// ----------------------------------------------------------------------------
+
+def A57WriteISReg : SchedWriteVariant<[
+ SchedVar<RegShiftedPred, [WriteISReg]>,
+ SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A57WriteISReg], (instregex ".*rs$")>;
+
+
// Divide and Multiply Instructions
// -----------------------------------------------------------------------------
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 221d70d..0b97af8 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -47,8 +47,9 @@ AArch64Subtarget::AArch64Subtarget(const std::string &TT,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
+ HasV8_1aOps(false),
HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
- HasV8_1a(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
InstrInfo(initializeSubtargetDependencies(FS)),
TSInfo(TM.getDataLayout()), TLInfo(TM, *this) {}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index bcab97d..5454b20 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -37,11 +37,12 @@ protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
+ bool HasV8_1aOps;
+
bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
bool HasCRC;
- bool HasV8_1a;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove;
@@ -93,6 +94,8 @@ public:
return isCortexA53() || isCortexA57();
}
+ bool hasV8_1aOps() const { return HasV8_1aOps; }
+
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
@@ -101,7 +104,6 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool hasV8_1a() const { return HasV8_1a; }
bool isLittleEndian() const { return IsLittle; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index f902f64..ab28a16 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -87,6 +87,11 @@ EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
cl::desc("Enable optimizations on complex GEPs"),
cl::init(true));
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"));
+
extern "C" void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
@@ -245,7 +250,9 @@ bool AArch64PassConfig::addPreISel() {
// FIXME: On AArch64, this depends on the type.
// Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
// and the offset has to be a multiple of the related size in bytes.
- if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+ if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ EnableGlobalMerge == cl::BOU_UNSET) ||
+ EnableGlobalMerge == cl::BOU_TRUE)
addPass(createGlobalMergePass(TM, 4095));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createAArch64AddressTypePromotionPass());
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 1219ffc..063c714 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -1972,7 +1972,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
- StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ StringRef Name =
+ Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
S, getContext()));
return MatchOperand_Success;
@@ -1985,7 +1986,8 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64PRFM::PRFMMapper();
- unsigned prfop = Mapper.fromString(Tok.getString(), Valid);
+ unsigned prfop =
+ Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
if (!Valid) {
TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
@@ -2090,15 +2092,16 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
if (Tok.is(AsmToken::Real)) {
APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ if (isNegative)
+ RealVal.changeSign();
+
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
Parser.Lex(); // Eat the token.
// Check for out of range values. As an exception, we let Zero through,
// as we handle that special case in post-processing before matching in
// order to use the zero register for it.
- if (Val == -1 && !RealVal.isZero()) {
+ if (Val == -1 && !RealVal.isPosZero()) {
TokError("expected compatible register or floating-point constant");
return MatchOperand_ParseFail;
}
@@ -2597,7 +2600,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
}
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
- StringRef Name = Mapper.toString(MCE->getValue(), Valid);
+ StringRef Name =
+ Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
ExprLoc, getContext()));
return MatchOperand_Success;
@@ -2610,7 +2614,8 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
bool Valid;
auto Mapper = AArch64DB::DBarrierMapper();
- unsigned Opt = Mapper.fromString(Tok.getString(), Valid);
+ unsigned Opt =
+ Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
if (!Valid) {
TokError("invalid barrier option name");
return MatchOperand_ParseFail;
@@ -2638,18 +2643,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
return MatchOperand_NoMatch;
bool IsKnown;
- auto MRSMapper = AArch64SysReg::MRSMapper(STI.getFeatureBits());
- uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), IsKnown);
+ auto MRSMapper = AArch64SysReg::MRSMapper();
+ uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+ IsKnown);
assert(IsKnown == (MRSReg != -1U) &&
"register should be -1 if and only if it's unknown");
- auto MSRMapper = AArch64SysReg::MSRMapper(STI.getFeatureBits());
- uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), IsKnown);
+ auto MSRMapper = AArch64SysReg::MSRMapper();
+ uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
+ IsKnown);
assert(IsKnown == (MSRReg != -1U) &&
"register should be -1 if and only if it's unknown");
auto PStateMapper = AArch64PState::PStateMapper();
- uint32_t PStateField = PStateMapper.fromString(Tok.getString(), IsKnown);
+ uint32_t PStateField =
+ PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
assert(IsKnown == (PStateField != -1U) &&
"register should be -1 if and only if it's unknown");
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index fb25089..1c8c0a66 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1102,6 +1102,12 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
case AArch64::STLRW:
case AArch64::STLRB:
case AArch64::STLRH:
+ case AArch64::STLLRW:
+ case AArch64::STLLRB:
+ case AArch64::STLLRH:
+ case AArch64::LDLARW:
+ case AArch64::LDLARB:
+ case AArch64::LDLARH:
DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
break;
case AArch64::STLXRX:
@@ -1112,6 +1118,8 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
case AArch64::LDAXRX:
case AArch64::LDXRX:
case AArch64::STLRX:
+ case AArch64::LDLARX:
+ case AArch64::STLLRX:
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
break;
case AArch64::STLXPW:
@@ -1504,7 +1512,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
Inst.addOperand(MCOperand::CreateImm(crm));
bool ValidNamed;
- (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+ (void)AArch64PState::PStateMapper().toString(pstate_field,
+ Dis->getSubtargetInfo().getFeatureBits(), ValidNamed);
return ValidNamed ? Success : Fail;
}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 46a1d79..febd332 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -34,18 +34,13 @@ using namespace llvm;
AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
-}
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+ const MCRegisterInfo &MRI)
+ : AArch64InstPrinter(MAI, MII, MRI) {}
void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// This is for .cfi directives.
@@ -53,7 +48,8 @@ void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
// Check for special encodings and print the canonical alias instead.
unsigned Opcode = MI->getOpcode();
@@ -210,8 +206,8 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ if (!printAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
printAnnotation(O, Annot);
}
@@ -614,7 +610,8 @@ static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
}
void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
unsigned Opcode = MI->getOpcode();
StringRef Layout, Mnemonic;
@@ -624,7 +621,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
<< getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
unsigned ListOpNum = IsTbx ? 2 : 1;
- printVectorList(MI, ListOpNum, O, "");
+ printVectorList(MI, ListOpNum, STI, O, "");
O << ", "
<< getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
@@ -638,7 +635,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// Now onto the operands: first a vector list with possible lane
// specifier. E.g. { v0 }[2]
int OpNum = LdStDesc->ListOperand;
- printVectorList(MI, OpNum++, O, "");
+ printVectorList(MI, OpNum++, STI, O, "");
if (LdStDesc->HasLane)
O << '[' << MI->getOperand(OpNum++).getImm() << ']';
@@ -662,7 +659,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- AArch64InstPrinter::printInst(MI, O, Annot);
+ AArch64InstPrinter::printInst(MI, O, Annot, STI);
}
bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
@@ -889,6 +886,7 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
@@ -903,6 +901,7 @@ void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
O << format("#%#llx", Op.getImm());
@@ -922,6 +921,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isReg() && "Non-register vreg operand!");
@@ -930,6 +930,7 @@ void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
@@ -937,6 +938,7 @@ void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.isImm()) {
@@ -946,18 +948,19 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
O << '#' << Val;
if (Shift != 0)
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
if (CommentStream)
*CommentStream << '=' << (Val << Shift) << '\n';
} else {
assert(MO.isExpr() && "Unexpected operand type!");
O << *MO.getExpr();
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
}
}
void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
uint64_t Val = MI->getOperand(OpNum).getImm();
O << "#0x";
@@ -965,6 +968,7 @@ void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
uint64_t Val = MI->getOperand(OpNum).getImm();
O << "#0x";
@@ -972,6 +976,7 @@ void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNum).getImm();
// LSL #0 should not be printed.
@@ -983,18 +988,21 @@ void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << getRegisterName(MI->getOperand(OpNum).getReg());
- printShifter(MI, OpNum + 1, O);
+ printShifter(MI, OpNum + 1, STI, O);
}
void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << getRegisterName(MI->getOperand(OpNum).getReg());
- printArithExtend(MI, OpNum + 1, O);
+ printArithExtend(MI, OpNum + 1, STI, O);
}
void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNum).getImm();
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
@@ -1038,24 +1046,28 @@ void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
O << AArch64CC::getCondCodeName(CC);
}
void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
}
void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
}
template<int Scale>
void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << '#' << Scale * MI->getOperand(OpNum).getImm();
}
@@ -1085,10 +1097,12 @@ void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned prfop = MI->getOperand(OpNum).getImm();
bool Valid;
- StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+ StringRef Name =
+ AArch64PRFM::PRFMMapper().toString(prfop, STI.getFeatureBits(), Valid);
if (Valid)
O << Name;
else
@@ -1096,6 +1110,7 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
float FPImm =
@@ -1151,6 +1166,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
}
void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O,
StringRef LayoutSuffix) {
unsigned Reg = MI->getOperand(OpNum).getReg();
@@ -1193,14 +1209,17 @@ void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
O << " }";
}
-void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- printVectorList(MI, OpNum, O, "");
+void
+AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printVectorList(MI, OpNum, STI, O, "");
}
template <unsigned NumLanes, char LaneKind>
void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
std::string Suffix(".");
if (NumLanes)
@@ -1208,15 +1227,17 @@ void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
else
Suffix += LaneKind;
- printVectorList(MI, OpNum, O, Suffix);
+ printVectorList(MI, OpNum, STI, O, Suffix);
}
void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1241,6 +1262,7 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
@@ -1256,6 +1278,7 @@ void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
}
void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
unsigned Opcode = MI->getOpcode();
@@ -1263,9 +1286,11 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
bool Valid;
StringRef Name;
if (Opcode == AArch64::ISB)
- Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+ Name = AArch64ISB::ISBMapper().toString(Val, STI.getFeatureBits(),
+ Valid);
else
- Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+ Name = AArch64DB::DBarrierMapper().toString(Val, STI.getFeatureBits(),
+ Valid);
if (Valid)
O << Name;
else
@@ -1273,31 +1298,35 @@ void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val);
+ auto Mapper = AArch64SysReg::MRSMapper();
+ std::string Name = Mapper.toString(Val, STI.getFeatureBits());
O << StringRef(Name).upper();
}
void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
- auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
- std::string Name = Mapper.toString(Val);
+ auto Mapper = AArch64SysReg::MSRMapper();
+ std::string Name = Mapper.toString(Val, STI.getFeatureBits());
O << StringRef(Name).upper();
}
void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Val = MI->getOperand(OpNo).getImm();
bool Valid;
- StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+ StringRef Name =
+ AArch64PState::PStateMapper().toString(Val, STI.getFeatureBits(), Valid);
if (Valid)
O << StringRef(Name.str()).upper();
else
@@ -1305,6 +1334,7 @@ void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
}
void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned RawVal = MI->getOperand(OpNo).getImm();
uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 5f51621..c2077a0 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -26,16 +26,21 @@ class MCOperand;
class AArch64InstPrinter : public MCInstPrinter {
public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- virtual void printInstruction(const MCInst *MI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ virtual void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ virtual bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
virtual StringRef getRegName(unsigned RegNo) const {
return getRegisterName(RegNo);
}
@@ -45,90 +50,126 @@ public:
protected:
bool printSysAlias(const MCInst *MI, raw_ostream &O);
// Operand printers
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printHexImm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
raw_ostream &O);
- template<int Amount>
- void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ template <int Amount>
+ void printPostIncOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printPostIncOperand(MI, OpNo, Amount, O);
}
- void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVRegOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSysCROperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddSubImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printLogicalImm32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printLogicalImm64(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShifter(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShiftedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printExtendedRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printArithExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
char SrcRegKind, unsigned Width);
template <char SrcRegKind, unsigned Width>
- void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ void printMemExtend(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printMemExtend(MI, OpNum, O, SrcRegKind, Width);
}
- void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printInverseCondCode(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
raw_ostream &O);
- template<int Scale>
- void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ template <int Scale>
+ void printUImm12Offset(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printUImm12Offset(MI, OpNum, Scale, O);
}
- template<int BitWidth>
- void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ template <int BitWidth>
+ void printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
}
- void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAMNoIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- template<int Scale>
- void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <int Scale>
+ void printImmScale(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printPrefetchOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+ void printVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O,
StringRef LayoutSuffix);
/// Print a list of vector registers where the type suffix is implicit
/// (i.e. attached to the instruction rather than the registers).
void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
template <unsigned NumLanes, char LaneKind>
- void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printTypedVectorList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBarrierOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMSRSystemRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMRSSystemRegister(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSystemPStateField(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSIMDType10Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
class AArch64AppleInstPrinter : public AArch64InstPrinter {
public:
AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
- void printInstruction(const MCInst *MI, raw_ostream &O) override;
- bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+ void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O) override;
+ bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O) override;
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI,
raw_ostream &O) override;
StringRef getRegName(unsigned RegNo) const override {
return getRegisterName(RegNo);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 84b63a0..e5eb90c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -313,7 +313,7 @@ public:
DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
: AArch64AsmBackend(T), MRI(MRI) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
MachO::CPU_SUBTYPE_ARM64_ALL);
}
@@ -461,7 +461,7 @@ public:
ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
: AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 5ea49c3..1f516d1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -26,7 +26,7 @@ class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
public:
AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
- virtual ~AArch64ELFObjectWriter();
+ ~AArch64ELFObjectWriter() override;
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -248,9 +248,9 @@ unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
llvm_unreachable("Unimplemented fixup -> relocation");
}
-MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
- bool IsLittleEndian) {
+MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
+ bool IsLittleEndian) {
MCELFObjectTargetWriter *MOTW =
new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
return createELFObjectWriter(MOTW, OS, IsLittleEndian);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 8f780d2..540d1fc 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -89,12 +89,12 @@ class AArch64ELFStreamer : public MCELFStreamer {
public:
friend class AArch64TargetELFStreamer;
- AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter)
+ AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
LastEMS(EMS_None) {}
- ~AArch64ELFStreamer() {}
+ ~AArch64ELFStreamer() override {}
void ChangeSection(const MCSection *Section,
const MCExpr *Subsection) override {
@@ -211,8 +211,8 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
}
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll) {
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll) {
AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
if (RelaxAll)
S->getAssembler().setRelaxAll(true);
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 71b05cc..ef48203 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -19,8 +19,8 @@
namespace llvm {
MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll);
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
}
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 9ea49f0..fd4dc47 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -40,7 +40,7 @@ class AArch64MCCodeEmitter : public MCCodeEmitter {
public:
AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
- ~AArch64MCCodeEmitter() {}
+ ~AArch64MCCodeEmitter() override {}
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 38b399d..afad674 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -109,29 +109,28 @@ static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
+static MCInstPrinter *createAArch64MCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new AArch64InstPrinter(MAI, MII, MRI, STI);
+ return new AArch64InstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
- return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+ return new AArch64AppleInstPrinter(MAI, MII, MRI);
return nullptr;
}
static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
- MCAsmBackend &TAB, raw_ostream &OS,
+ MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll) {
return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
}
static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll,
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
bool DWARFMustBeAtTheEnd) {
return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
DWARFMustBeAtTheEnd,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 7ce303b..4705bdf 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -33,6 +33,7 @@ class StringRef;
class Target;
class Triple;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheAArch64leTarget;
extern Target TheAArch64beTarget;
@@ -48,11 +49,13 @@ MCAsmBackend *createAArch64beAsmBackend(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
bool IsLittleEndian);
-MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
- uint32_t CPUSubtype);
+MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
formatted_raw_ostream &OS,
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 0d9385d..61649c4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -413,9 +413,9 @@ void AArch64MachObjectWriter::RecordRelocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
- uint32_t CPUType,
- uint32_t CPUSubtype) {
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_pwrite_stream &OS,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
return createMachObjectWriter(
new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
/*IsLittleEndian=*/true);
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 160c1c5..8696163 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,9 +18,10 @@
using namespace llvm;
-StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value, uint64_t FeatureBits,
+ bool &Valid) const {
for (unsigned i = 0; i < NumMappings; ++i) {
- if (Mappings[i].Value == Value) {
+ if (Mappings[i].isValueEqual(Value, FeatureBits)) {
Valid = true;
return Mappings[i].Name;
}
@@ -30,10 +31,11 @@ StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
return StringRef();
}
-uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name, uint64_t FeatureBits,
+ bool &Valid) const {
std::string LowerCaseName = Name.lower();
for (unsigned i = 0; i < NumMappings; ++i) {
- if (Mappings[i].Name == LowerCaseName) {
+ if (Mappings[i].isNameEqual(LowerCaseName, FeatureBits)) {
Valid = true;
return Mappings[i].Value;
}
@@ -48,744 +50,776 @@ bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
}
const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATMappings[] = {
- {"s1e1r", S1E1R},
- {"s1e2r", S1E2R},
- {"s1e3r", S1E3R},
- {"s1e1w", S1E1W},
- {"s1e2w", S1E2W},
- {"s1e3w", S1E3W},
- {"s1e0r", S1E0R},
- {"s1e0w", S1E0W},
- {"s12e1r", S12E1R},
- {"s12e1w", S12E1W},
- {"s12e0r", S12E0R},
- {"s12e0w", S12E0W},
+ {"s1e1r", S1E1R, 0},
+ {"s1e2r", S1E2R, 0},
+ {"s1e3r", S1E3R, 0},
+ {"s1e1w", S1E1W, 0},
+ {"s1e2w", S1E2W, 0},
+ {"s1e3w", S1E3W, 0},
+ {"s1e0r", S1E0R, 0},
+ {"s1e0w", S1E0W, 0},
+ {"s12e1r", S12E1R, 0},
+ {"s12e1w", S12E1W, 0},
+ {"s12e0r", S12E0R, 0},
+ {"s12e0w", S12E0W, 0},
};
AArch64AT::ATMapper::ATMapper()
: AArch64NamedImmMapper(ATMappings, 0) {}
const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierMappings[] = {
- {"oshld", OSHLD},
- {"oshst", OSHST},
- {"osh", OSH},
- {"nshld", NSHLD},
- {"nshst", NSHST},
- {"nsh", NSH},
- {"ishld", ISHLD},
- {"ishst", ISHST},
- {"ish", ISH},
- {"ld", LD},
- {"st", ST},
- {"sy", SY}
+ {"oshld", OSHLD, 0},
+ {"oshst", OSHST, 0},
+ {"osh", OSH, 0},
+ {"nshld", NSHLD, 0},
+ {"nshst", NSHST, 0},
+ {"nsh", NSH, 0},
+ {"ishld", ISHLD, 0},
+ {"ishst", ISHST, 0},
+ {"ish", ISH, 0},
+ {"ld", LD, 0},
+ {"st", ST, 0},
+ {"sy", SY, 0}
};
AArch64DB::DBarrierMapper::DBarrierMapper()
: AArch64NamedImmMapper(DBarrierMappings, 16u) {}
const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCMappings[] = {
- {"zva", ZVA},
- {"ivac", IVAC},
- {"isw", ISW},
- {"cvac", CVAC},
- {"csw", CSW},
- {"cvau", CVAU},
- {"civac", CIVAC},
- {"cisw", CISW}
+ {"zva", ZVA, 0},
+ {"ivac", IVAC, 0},
+ {"isw", ISW, 0},
+ {"cvac", CVAC, 0},
+ {"csw", CSW, 0},
+ {"cvau", CVAU, 0},
+ {"civac", CIVAC, 0},
+ {"cisw", CISW, 0}
};
AArch64DC::DCMapper::DCMapper()
: AArch64NamedImmMapper(DCMappings, 0) {}
const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICMappings[] = {
- {"ialluis", IALLUIS},
- {"iallu", IALLU},
- {"ivau", IVAU}
+ {"ialluis", IALLUIS, 0},
+ {"iallu", IALLU, 0},
+ {"ivau", IVAU, 0}
};
AArch64IC::ICMapper::ICMapper()
: AArch64NamedImmMapper(ICMappings, 0) {}
const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBMappings[] = {
- {"sy", SY},
+ {"sy", SY, 0},
};
AArch64ISB::ISBMapper::ISBMapper()
: AArch64NamedImmMapper(ISBMappings, 16) {}
const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMMappings[] = {
- {"pldl1keep", PLDL1KEEP},
- {"pldl1strm", PLDL1STRM},
- {"pldl2keep", PLDL2KEEP},
- {"pldl2strm", PLDL2STRM},
- {"pldl3keep", PLDL3KEEP},
- {"pldl3strm", PLDL3STRM},
- {"plil1keep", PLIL1KEEP},
- {"plil1strm", PLIL1STRM},
- {"plil2keep", PLIL2KEEP},
- {"plil2strm", PLIL2STRM},
- {"plil3keep", PLIL3KEEP},
- {"plil3strm", PLIL3STRM},
- {"pstl1keep", PSTL1KEEP},
- {"pstl1strm", PSTL1STRM},
- {"pstl2keep", PSTL2KEEP},
- {"pstl2strm", PSTL2STRM},
- {"pstl3keep", PSTL3KEEP},
- {"pstl3strm", PSTL3STRM}
+ {"pldl1keep", PLDL1KEEP, 0},
+ {"pldl1strm", PLDL1STRM, 0},
+ {"pldl2keep", PLDL2KEEP, 0},
+ {"pldl2strm", PLDL2STRM, 0},
+ {"pldl3keep", PLDL3KEEP, 0},
+ {"pldl3strm", PLDL3STRM, 0},
+ {"plil1keep", PLIL1KEEP, 0},
+ {"plil1strm", PLIL1STRM, 0},
+ {"plil2keep", PLIL2KEEP, 0},
+ {"plil2strm", PLIL2STRM, 0},
+ {"plil3keep", PLIL3KEEP, 0},
+ {"plil3strm", PLIL3STRM, 0},
+ {"pstl1keep", PSTL1KEEP, 0},
+ {"pstl1strm", PSTL1STRM, 0},
+ {"pstl2keep", PSTL2KEEP, 0},
+ {"pstl2strm", PSTL2STRM, 0},
+ {"pstl3keep", PSTL3KEEP, 0},
+ {"pstl3strm", PSTL3STRM, 0}
};
AArch64PRFM::PRFMMapper::PRFMMapper()
: AArch64NamedImmMapper(PRFMMappings, 32) {}
const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings[] = {
- {"spsel", SPSel},
- {"daifset", DAIFSet},
- {"daifclr", DAIFClr}
+ {"spsel", SPSel, 0},
+ {"daifset", DAIFSet, 0},
+ {"daifclr", DAIFClr, 0},
+
+ // v8.1a "Privileged Access Never" extension-specific PStates
+ {"pan", PAN, AArch64::HasV8_1aOps},
};
AArch64PState::PStateMapper::PStateMapper()
: AArch64NamedImmMapper(PStateMappings, 0) {}
const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
- {"mdccsr_el0", MDCCSR_EL0},
- {"dbgdtrrx_el0", DBGDTRRX_EL0},
- {"mdrar_el1", MDRAR_EL1},
- {"oslsr_el1", OSLSR_EL1},
- {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1},
- {"pmceid0_el0", PMCEID0_EL0},
- {"pmceid1_el0", PMCEID1_EL0},
- {"midr_el1", MIDR_EL1},
- {"ccsidr_el1", CCSIDR_EL1},
- {"clidr_el1", CLIDR_EL1},
- {"ctr_el0", CTR_EL0},
- {"mpidr_el1", MPIDR_EL1},
- {"revidr_el1", REVIDR_EL1},
- {"aidr_el1", AIDR_EL1},
- {"dczid_el0", DCZID_EL0},
- {"id_pfr0_el1", ID_PFR0_EL1},
- {"id_pfr1_el1", ID_PFR1_EL1},
- {"id_dfr0_el1", ID_DFR0_EL1},
- {"id_afr0_el1", ID_AFR0_EL1},
- {"id_mmfr0_el1", ID_MMFR0_EL1},
- {"id_mmfr1_el1", ID_MMFR1_EL1},
- {"id_mmfr2_el1", ID_MMFR2_EL1},
- {"id_mmfr3_el1", ID_MMFR3_EL1},
- {"id_isar0_el1", ID_ISAR0_EL1},
- {"id_isar1_el1", ID_ISAR1_EL1},
- {"id_isar2_el1", ID_ISAR2_EL1},
- {"id_isar3_el1", ID_ISAR3_EL1},
- {"id_isar4_el1", ID_ISAR4_EL1},
- {"id_isar5_el1", ID_ISAR5_EL1},
- {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
- {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
- {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
- {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
- {"id_aa64afr0_el1", ID_A64AFR0_EL1},
- {"id_aa64afr1_el1", ID_A64AFR1_EL1},
- {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
- {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
- {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
- {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
- {"mvfr0_el1", MVFR0_EL1},
- {"mvfr1_el1", MVFR1_EL1},
- {"mvfr2_el1", MVFR2_EL1},
- {"rvbar_el1", RVBAR_EL1},
- {"rvbar_el2", RVBAR_EL2},
- {"rvbar_el3", RVBAR_EL3},
- {"isr_el1", ISR_EL1},
- {"cntpct_el0", CNTPCT_EL0},
- {"cntvct_el0", CNTVCT_EL0},
+ {"mdccsr_el0", MDCCSR_EL0, 0},
+ {"dbgdtrrx_el0", DBGDTRRX_EL0, 0},
+ {"mdrar_el1", MDRAR_EL1, 0},
+ {"oslsr_el1", OSLSR_EL1, 0},
+ {"dbgauthstatus_el1", DBGAUTHSTATUS_EL1, 0},
+ {"pmceid0_el0", PMCEID0_EL0, 0},
+ {"pmceid1_el0", PMCEID1_EL0, 0},
+ {"midr_el1", MIDR_EL1, 0},
+ {"ccsidr_el1", CCSIDR_EL1, 0},
+ {"clidr_el1", CLIDR_EL1, 0},
+ {"ctr_el0", CTR_EL0, 0},
+ {"mpidr_el1", MPIDR_EL1, 0},
+ {"revidr_el1", REVIDR_EL1, 0},
+ {"aidr_el1", AIDR_EL1, 0},
+ {"dczid_el0", DCZID_EL0, 0},
+ {"id_pfr0_el1", ID_PFR0_EL1, 0},
+ {"id_pfr1_el1", ID_PFR1_EL1, 0},
+ {"id_dfr0_el1", ID_DFR0_EL1, 0},
+ {"id_afr0_el1", ID_AFR0_EL1, 0},
+ {"id_mmfr0_el1", ID_MMFR0_EL1, 0},
+ {"id_mmfr1_el1", ID_MMFR1_EL1, 0},
+ {"id_mmfr2_el1", ID_MMFR2_EL1, 0},
+ {"id_mmfr3_el1", ID_MMFR3_EL1, 0},
+ {"id_isar0_el1", ID_ISAR0_EL1, 0},
+ {"id_isar1_el1", ID_ISAR1_EL1, 0},
+ {"id_isar2_el1", ID_ISAR2_EL1, 0},
+ {"id_isar3_el1", ID_ISAR3_EL1, 0},
+ {"id_isar4_el1", ID_ISAR4_EL1, 0},
+ {"id_isar5_el1", ID_ISAR5_EL1, 0},
+ {"id_aa64pfr0_el1", ID_A64PFR0_EL1, 0},
+ {"id_aa64pfr1_el1", ID_A64PFR1_EL1, 0},
+ {"id_aa64dfr0_el1", ID_A64DFR0_EL1, 0},
+ {"id_aa64dfr1_el1", ID_A64DFR1_EL1, 0},
+ {"id_aa64afr0_el1", ID_A64AFR0_EL1, 0},
+ {"id_aa64afr1_el1", ID_A64AFR1_EL1, 0},
+ {"id_aa64isar0_el1", ID_A64ISAR0_EL1, 0},
+ {"id_aa64isar1_el1", ID_A64ISAR1_EL1, 0},
+ {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, 0},
+ {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, 0},
+ {"mvfr0_el1", MVFR0_EL1, 0},
+ {"mvfr1_el1", MVFR1_EL1, 0},
+ {"mvfr2_el1", MVFR2_EL1, 0},
+ {"rvbar_el1", RVBAR_EL1, 0},
+ {"rvbar_el2", RVBAR_EL2, 0},
+ {"rvbar_el3", RVBAR_EL3, 0},
+ {"isr_el1", ISR_EL1, 0},
+ {"cntpct_el0", CNTPCT_EL0, 0},
+ {"cntvct_el0", CNTVCT_EL0, 0},
// Trace registers
- {"trcstatr", TRCSTATR},
- {"trcidr8", TRCIDR8},
- {"trcidr9", TRCIDR9},
- {"trcidr10", TRCIDR10},
- {"trcidr11", TRCIDR11},
- {"trcidr12", TRCIDR12},
- {"trcidr13", TRCIDR13},
- {"trcidr0", TRCIDR0},
- {"trcidr1", TRCIDR1},
- {"trcidr2", TRCIDR2},
- {"trcidr3", TRCIDR3},
- {"trcidr4", TRCIDR4},
- {"trcidr5", TRCIDR5},
- {"trcidr6", TRCIDR6},
- {"trcidr7", TRCIDR7},
- {"trcoslsr", TRCOSLSR},
- {"trcpdsr", TRCPDSR},
- {"trcdevaff0", TRCDEVAFF0},
- {"trcdevaff1", TRCDEVAFF1},
- {"trclsr", TRCLSR},
- {"trcauthstatus", TRCAUTHSTATUS},
- {"trcdevarch", TRCDEVARCH},
- {"trcdevid", TRCDEVID},
- {"trcdevtype", TRCDEVTYPE},
- {"trcpidr4", TRCPIDR4},
- {"trcpidr5", TRCPIDR5},
- {"trcpidr6", TRCPIDR6},
- {"trcpidr7", TRCPIDR7},
- {"trcpidr0", TRCPIDR0},
- {"trcpidr1", TRCPIDR1},
- {"trcpidr2", TRCPIDR2},
- {"trcpidr3", TRCPIDR3},
- {"trccidr0", TRCCIDR0},
- {"trccidr1", TRCCIDR1},
- {"trccidr2", TRCCIDR2},
- {"trccidr3", TRCCIDR3},
+ {"trcstatr", TRCSTATR, 0},
+ {"trcidr8", TRCIDR8, 0},
+ {"trcidr9", TRCIDR9, 0},
+ {"trcidr10", TRCIDR10, 0},
+ {"trcidr11", TRCIDR11, 0},
+ {"trcidr12", TRCIDR12, 0},
+ {"trcidr13", TRCIDR13, 0},
+ {"trcidr0", TRCIDR0, 0},
+ {"trcidr1", TRCIDR1, 0},
+ {"trcidr2", TRCIDR2, 0},
+ {"trcidr3", TRCIDR3, 0},
+ {"trcidr4", TRCIDR4, 0},
+ {"trcidr5", TRCIDR5, 0},
+ {"trcidr6", TRCIDR6, 0},
+ {"trcidr7", TRCIDR7, 0},
+ {"trcoslsr", TRCOSLSR, 0},
+ {"trcpdsr", TRCPDSR, 0},
+ {"trcdevaff0", TRCDEVAFF0, 0},
+ {"trcdevaff1", TRCDEVAFF1, 0},
+ {"trclsr", TRCLSR, 0},
+ {"trcauthstatus", TRCAUTHSTATUS, 0},
+ {"trcdevarch", TRCDEVARCH, 0},
+ {"trcdevid", TRCDEVID, 0},
+ {"trcdevtype", TRCDEVTYPE, 0},
+ {"trcpidr4", TRCPIDR4, 0},
+ {"trcpidr5", TRCPIDR5, 0},
+ {"trcpidr6", TRCPIDR6, 0},
+ {"trcpidr7", TRCPIDR7, 0},
+ {"trcpidr0", TRCPIDR0, 0},
+ {"trcpidr1", TRCPIDR1, 0},
+ {"trcpidr2", TRCPIDR2, 0},
+ {"trcpidr3", TRCPIDR3, 0},
+ {"trccidr0", TRCCIDR0, 0},
+ {"trccidr1", TRCCIDR1, 0},
+ {"trccidr2", TRCCIDR2, 0},
+ {"trccidr3", TRCCIDR3, 0},
// GICv3 registers
- {"icc_iar1_el1", ICC_IAR1_EL1},
- {"icc_iar0_el1", ICC_IAR0_EL1},
- {"icc_hppir1_el1", ICC_HPPIR1_EL1},
- {"icc_hppir0_el1", ICC_HPPIR0_EL1},
- {"icc_rpr_el1", ICC_RPR_EL1},
- {"ich_vtr_el2", ICH_VTR_EL2},
- {"ich_eisr_el2", ICH_EISR_EL2},
- {"ich_elsr_el2", ICH_ELSR_EL2}
+ {"icc_iar1_el1", ICC_IAR1_EL1, 0},
+ {"icc_iar0_el1", ICC_IAR0_EL1, 0},
+ {"icc_hppir1_el1", ICC_HPPIR1_EL1, 0},
+ {"icc_hppir0_el1", ICC_HPPIR0_EL1, 0},
+ {"icc_rpr_el1", ICC_RPR_EL1, 0},
+ {"ich_vtr_el2", ICH_VTR_EL2, 0},
+ {"ich_eisr_el2", ICH_EISR_EL2, 0},
+ {"ich_elsr_el2", ICH_ELSR_EL2, 0}
};
-AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
- : SysRegMapper(FeatureBits) {
+AArch64SysReg::MRSMapper::MRSMapper() {
InstMappings = &MRSMappings[0];
NumInstMappings = llvm::array_lengthof(MRSMappings);
}
const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
- {"dbgdtrtx_el0", DBGDTRTX_EL0},
- {"oslar_el1", OSLAR_EL1},
- {"pmswinc_el0", PMSWINC_EL0},
+ {"dbgdtrtx_el0", DBGDTRTX_EL0, 0},
+ {"oslar_el1", OSLAR_EL1, 0},
+ {"pmswinc_el0", PMSWINC_EL0, 0},
// Trace registers
- {"trcoslar", TRCOSLAR},
- {"trclar", TRCLAR},
+ {"trcoslar", TRCOSLAR, 0},
+ {"trclar", TRCLAR, 0},
// GICv3 registers
- {"icc_eoir1_el1", ICC_EOIR1_EL1},
- {"icc_eoir0_el1", ICC_EOIR0_EL1},
- {"icc_dir_el1", ICC_DIR_EL1},
- {"icc_sgi1r_el1", ICC_SGI1R_EL1},
- {"icc_asgi1r_el1", ICC_ASGI1R_EL1},
- {"icc_sgi0r_el1", ICC_SGI0R_EL1}
+ {"icc_eoir1_el1", ICC_EOIR1_EL1, 0},
+ {"icc_eoir0_el1", ICC_EOIR0_EL1, 0},
+ {"icc_dir_el1", ICC_DIR_EL1, 0},
+ {"icc_sgi1r_el1", ICC_SGI1R_EL1, 0},
+ {"icc_asgi1r_el1", ICC_ASGI1R_EL1, 0},
+ {"icc_sgi0r_el1", ICC_SGI0R_EL1, 0},
+
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ {"pan", PAN, AArch64::HasV8_1aOps},
};
-AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
- : SysRegMapper(FeatureBits) {
+AArch64SysReg::MSRMapper::MSRMapper() {
InstMappings = &MSRMappings[0];
NumInstMappings = llvm::array_lengthof(MSRMappings);
}
const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings[] = {
- {"osdtrrx_el1", OSDTRRX_EL1},
- {"osdtrtx_el1", OSDTRTX_EL1},
- {"teecr32_el1", TEECR32_EL1},
- {"mdccint_el1", MDCCINT_EL1},
- {"mdscr_el1", MDSCR_EL1},
- {"dbgdtr_el0", DBGDTR_EL0},
- {"oseccr_el1", OSECCR_EL1},
- {"dbgvcr32_el2", DBGVCR32_EL2},
- {"dbgbvr0_el1", DBGBVR0_EL1},
- {"dbgbvr1_el1", DBGBVR1_EL1},
- {"dbgbvr2_el1", DBGBVR2_EL1},
- {"dbgbvr3_el1", DBGBVR3_EL1},
- {"dbgbvr4_el1", DBGBVR4_EL1},
- {"dbgbvr5_el1", DBGBVR5_EL1},
- {"dbgbvr6_el1", DBGBVR6_EL1},
- {"dbgbvr7_el1", DBGBVR7_EL1},
- {"dbgbvr8_el1", DBGBVR8_EL1},
- {"dbgbvr9_el1", DBGBVR9_EL1},
- {"dbgbvr10_el1", DBGBVR10_EL1},
- {"dbgbvr11_el1", DBGBVR11_EL1},
- {"dbgbvr12_el1", DBGBVR12_EL1},
- {"dbgbvr13_el1", DBGBVR13_EL1},
- {"dbgbvr14_el1", DBGBVR14_EL1},
- {"dbgbvr15_el1", DBGBVR15_EL1},
- {"dbgbcr0_el1", DBGBCR0_EL1},
- {"dbgbcr1_el1", DBGBCR1_EL1},
- {"dbgbcr2_el1", DBGBCR2_EL1},
- {"dbgbcr3_el1", DBGBCR3_EL1},
- {"dbgbcr4_el1", DBGBCR4_EL1},
- {"dbgbcr5_el1", DBGBCR5_EL1},
- {"dbgbcr6_el1", DBGBCR6_EL1},
- {"dbgbcr7_el1", DBGBCR7_EL1},
- {"dbgbcr8_el1", DBGBCR8_EL1},
- {"dbgbcr9_el1", DBGBCR9_EL1},
- {"dbgbcr10_el1", DBGBCR10_EL1},
- {"dbgbcr11_el1", DBGBCR11_EL1},
- {"dbgbcr12_el1", DBGBCR12_EL1},
- {"dbgbcr13_el1", DBGBCR13_EL1},
- {"dbgbcr14_el1", DBGBCR14_EL1},
- {"dbgbcr15_el1", DBGBCR15_EL1},
- {"dbgwvr0_el1", DBGWVR0_EL1},
- {"dbgwvr1_el1", DBGWVR1_EL1},
- {"dbgwvr2_el1", DBGWVR2_EL1},
- {"dbgwvr3_el1", DBGWVR3_EL1},
- {"dbgwvr4_el1", DBGWVR4_EL1},
- {"dbgwvr5_el1", DBGWVR5_EL1},
- {"dbgwvr6_el1", DBGWVR6_EL1},
- {"dbgwvr7_el1", DBGWVR7_EL1},
- {"dbgwvr8_el1", DBGWVR8_EL1},
- {"dbgwvr9_el1", DBGWVR9_EL1},
- {"dbgwvr10_el1", DBGWVR10_EL1},
- {"dbgwvr11_el1", DBGWVR11_EL1},
- {"dbgwvr12_el1", DBGWVR12_EL1},
- {"dbgwvr13_el1", DBGWVR13_EL1},
- {"dbgwvr14_el1", DBGWVR14_EL1},
- {"dbgwvr15_el1", DBGWVR15_EL1},
- {"dbgwcr0_el1", DBGWCR0_EL1},
- {"dbgwcr1_el1", DBGWCR1_EL1},
- {"dbgwcr2_el1", DBGWCR2_EL1},
- {"dbgwcr3_el1", DBGWCR3_EL1},
- {"dbgwcr4_el1", DBGWCR4_EL1},
- {"dbgwcr5_el1", DBGWCR5_EL1},
- {"dbgwcr6_el1", DBGWCR6_EL1},
- {"dbgwcr7_el1", DBGWCR7_EL1},
- {"dbgwcr8_el1", DBGWCR8_EL1},
- {"dbgwcr9_el1", DBGWCR9_EL1},
- {"dbgwcr10_el1", DBGWCR10_EL1},
- {"dbgwcr11_el1", DBGWCR11_EL1},
- {"dbgwcr12_el1", DBGWCR12_EL1},
- {"dbgwcr13_el1", DBGWCR13_EL1},
- {"dbgwcr14_el1", DBGWCR14_EL1},
- {"dbgwcr15_el1", DBGWCR15_EL1},
- {"teehbr32_el1", TEEHBR32_EL1},
- {"osdlr_el1", OSDLR_EL1},
- {"dbgprcr_el1", DBGPRCR_EL1},
- {"dbgclaimset_el1", DBGCLAIMSET_EL1},
- {"dbgclaimclr_el1", DBGCLAIMCLR_EL1},
- {"csselr_el1", CSSELR_EL1},
- {"vpidr_el2", VPIDR_EL2},
- {"vmpidr_el2", VMPIDR_EL2},
- {"sctlr_el1", SCTLR_EL1},
- {"sctlr_el2", SCTLR_EL2},
- {"sctlr_el3", SCTLR_EL3},
- {"actlr_el1", ACTLR_EL1},
- {"actlr_el2", ACTLR_EL2},
- {"actlr_el3", ACTLR_EL3},
- {"cpacr_el1", CPACR_EL1},
- {"hcr_el2", HCR_EL2},
- {"scr_el3", SCR_EL3},
- {"mdcr_el2", MDCR_EL2},
- {"sder32_el3", SDER32_EL3},
- {"cptr_el2", CPTR_EL2},
- {"cptr_el3", CPTR_EL3},
- {"hstr_el2", HSTR_EL2},
- {"hacr_el2", HACR_EL2},
- {"mdcr_el3", MDCR_EL3},
- {"ttbr0_el1", TTBR0_EL1},
- {"ttbr0_el2", TTBR0_EL2},
- {"ttbr0_el3", TTBR0_EL3},
- {"ttbr1_el1", TTBR1_EL1},
- {"tcr_el1", TCR_EL1},
- {"tcr_el2", TCR_EL2},
- {"tcr_el3", TCR_EL3},
- {"vttbr_el2", VTTBR_EL2},
- {"vtcr_el2", VTCR_EL2},
- {"dacr32_el2", DACR32_EL2},
- {"spsr_el1", SPSR_EL1},
- {"spsr_el2", SPSR_EL2},
- {"spsr_el3", SPSR_EL3},
- {"elr_el1", ELR_EL1},
- {"elr_el2", ELR_EL2},
- {"elr_el3", ELR_EL3},
- {"sp_el0", SP_EL0},
- {"sp_el1", SP_EL1},
- {"sp_el2", SP_EL2},
- {"spsel", SPSel},
- {"nzcv", NZCV},
- {"daif", DAIF},
- {"currentel", CurrentEL},
- {"spsr_irq", SPSR_irq},
- {"spsr_abt", SPSR_abt},
- {"spsr_und", SPSR_und},
- {"spsr_fiq", SPSR_fiq},
- {"fpcr", FPCR},
- {"fpsr", FPSR},
- {"dspsr_el0", DSPSR_EL0},
- {"dlr_el0", DLR_EL0},
- {"ifsr32_el2", IFSR32_EL2},
- {"afsr0_el1", AFSR0_EL1},
- {"afsr0_el2", AFSR0_EL2},
- {"afsr0_el3", AFSR0_EL3},
- {"afsr1_el1", AFSR1_EL1},
- {"afsr1_el2", AFSR1_EL2},
- {"afsr1_el3", AFSR1_EL3},
- {"esr_el1", ESR_EL1},
- {"esr_el2", ESR_EL2},
- {"esr_el3", ESR_EL3},
- {"fpexc32_el2", FPEXC32_EL2},
- {"far_el1", FAR_EL1},
- {"far_el2", FAR_EL2},
- {"far_el3", FAR_EL3},
- {"hpfar_el2", HPFAR_EL2},
- {"par_el1", PAR_EL1},
- {"pmcr_el0", PMCR_EL0},
- {"pmcntenset_el0", PMCNTENSET_EL0},
- {"pmcntenclr_el0", PMCNTENCLR_EL0},
- {"pmovsclr_el0", PMOVSCLR_EL0},
- {"pmselr_el0", PMSELR_EL0},
- {"pmccntr_el0", PMCCNTR_EL0},
- {"pmxevtyper_el0", PMXEVTYPER_EL0},
- {"pmxevcntr_el0", PMXEVCNTR_EL0},
- {"pmuserenr_el0", PMUSERENR_EL0},
- {"pmintenset_el1", PMINTENSET_EL1},
- {"pmintenclr_el1", PMINTENCLR_EL1},
- {"pmovsset_el0", PMOVSSET_EL0},
- {"mair_el1", MAIR_EL1},
- {"mair_el2", MAIR_EL2},
- {"mair_el3", MAIR_EL3},
- {"amair_el1", AMAIR_EL1},
- {"amair_el2", AMAIR_EL2},
- {"amair_el3", AMAIR_EL3},
- {"vbar_el1", VBAR_EL1},
- {"vbar_el2", VBAR_EL2},
- {"vbar_el3", VBAR_EL3},
- {"rmr_el1", RMR_EL1},
- {"rmr_el2", RMR_EL2},
- {"rmr_el3", RMR_EL3},
- {"contextidr_el1", CONTEXTIDR_EL1},
- {"tpidr_el0", TPIDR_EL0},
- {"tpidr_el2", TPIDR_EL2},
- {"tpidr_el3", TPIDR_EL3},
- {"tpidrro_el0", TPIDRRO_EL0},
- {"tpidr_el1", TPIDR_EL1},
- {"cntfrq_el0", CNTFRQ_EL0},
- {"cntvoff_el2", CNTVOFF_EL2},
- {"cntkctl_el1", CNTKCTL_EL1},
- {"cnthctl_el2", CNTHCTL_EL2},
- {"cntp_tval_el0", CNTP_TVAL_EL0},
- {"cnthp_tval_el2", CNTHP_TVAL_EL2},
- {"cntps_tval_el1", CNTPS_TVAL_EL1},
- {"cntp_ctl_el0", CNTP_CTL_EL0},
- {"cnthp_ctl_el2", CNTHP_CTL_EL2},
- {"cntps_ctl_el1", CNTPS_CTL_EL1},
- {"cntp_cval_el0", CNTP_CVAL_EL0},
- {"cnthp_cval_el2", CNTHP_CVAL_EL2},
- {"cntps_cval_el1", CNTPS_CVAL_EL1},
- {"cntv_tval_el0", CNTV_TVAL_EL0},
- {"cntv_ctl_el0", CNTV_CTL_EL0},
- {"cntv_cval_el0", CNTV_CVAL_EL0},
- {"pmevcntr0_el0", PMEVCNTR0_EL0},
- {"pmevcntr1_el0", PMEVCNTR1_EL0},
- {"pmevcntr2_el0", PMEVCNTR2_EL0},
- {"pmevcntr3_el0", PMEVCNTR3_EL0},
- {"pmevcntr4_el0", PMEVCNTR4_EL0},
- {"pmevcntr5_el0", PMEVCNTR5_EL0},
- {"pmevcntr6_el0", PMEVCNTR6_EL0},
- {"pmevcntr7_el0", PMEVCNTR7_EL0},
- {"pmevcntr8_el0", PMEVCNTR8_EL0},
- {"pmevcntr9_el0", PMEVCNTR9_EL0},
- {"pmevcntr10_el0", PMEVCNTR10_EL0},
- {"pmevcntr11_el0", PMEVCNTR11_EL0},
- {"pmevcntr12_el0", PMEVCNTR12_EL0},
- {"pmevcntr13_el0", PMEVCNTR13_EL0},
- {"pmevcntr14_el0", PMEVCNTR14_EL0},
- {"pmevcntr15_el0", PMEVCNTR15_EL0},
- {"pmevcntr16_el0", PMEVCNTR16_EL0},
- {"pmevcntr17_el0", PMEVCNTR17_EL0},
- {"pmevcntr18_el0", PMEVCNTR18_EL0},
- {"pmevcntr19_el0", PMEVCNTR19_EL0},
- {"pmevcntr20_el0", PMEVCNTR20_EL0},
- {"pmevcntr21_el0", PMEVCNTR21_EL0},
- {"pmevcntr22_el0", PMEVCNTR22_EL0},
- {"pmevcntr23_el0", PMEVCNTR23_EL0},
- {"pmevcntr24_el0", PMEVCNTR24_EL0},
- {"pmevcntr25_el0", PMEVCNTR25_EL0},
- {"pmevcntr26_el0", PMEVCNTR26_EL0},
- {"pmevcntr27_el0", PMEVCNTR27_EL0},
- {"pmevcntr28_el0", PMEVCNTR28_EL0},
- {"pmevcntr29_el0", PMEVCNTR29_EL0},
- {"pmevcntr30_el0", PMEVCNTR30_EL0},
- {"pmccfiltr_el0", PMCCFILTR_EL0},
- {"pmevtyper0_el0", PMEVTYPER0_EL0},
- {"pmevtyper1_el0", PMEVTYPER1_EL0},
- {"pmevtyper2_el0", PMEVTYPER2_EL0},
- {"pmevtyper3_el0", PMEVTYPER3_EL0},
- {"pmevtyper4_el0", PMEVTYPER4_EL0},
- {"pmevtyper5_el0", PMEVTYPER5_EL0},
- {"pmevtyper6_el0", PMEVTYPER6_EL0},
- {"pmevtyper7_el0", PMEVTYPER7_EL0},
- {"pmevtyper8_el0", PMEVTYPER8_EL0},
- {"pmevtyper9_el0", PMEVTYPER9_EL0},
- {"pmevtyper10_el0", PMEVTYPER10_EL0},
- {"pmevtyper11_el0", PMEVTYPER11_EL0},
- {"pmevtyper12_el0", PMEVTYPER12_EL0},
- {"pmevtyper13_el0", PMEVTYPER13_EL0},
- {"pmevtyper14_el0", PMEVTYPER14_EL0},
- {"pmevtyper15_el0", PMEVTYPER15_EL0},
- {"pmevtyper16_el0", PMEVTYPER16_EL0},
- {"pmevtyper17_el0", PMEVTYPER17_EL0},
- {"pmevtyper18_el0", PMEVTYPER18_EL0},
- {"pmevtyper19_el0", PMEVTYPER19_EL0},
- {"pmevtyper20_el0", PMEVTYPER20_EL0},
- {"pmevtyper21_el0", PMEVTYPER21_EL0},
- {"pmevtyper22_el0", PMEVTYPER22_EL0},
- {"pmevtyper23_el0", PMEVTYPER23_EL0},
- {"pmevtyper24_el0", PMEVTYPER24_EL0},
- {"pmevtyper25_el0", PMEVTYPER25_EL0},
- {"pmevtyper26_el0", PMEVTYPER26_EL0},
- {"pmevtyper27_el0", PMEVTYPER27_EL0},
- {"pmevtyper28_el0", PMEVTYPER28_EL0},
- {"pmevtyper29_el0", PMEVTYPER29_EL0},
- {"pmevtyper30_el0", PMEVTYPER30_EL0},
+ {"osdtrrx_el1", OSDTRRX_EL1, 0},
+ {"osdtrtx_el1", OSDTRTX_EL1, 0},
+ {"teecr32_el1", TEECR32_EL1, 0},
+ {"mdccint_el1", MDCCINT_EL1, 0},
+ {"mdscr_el1", MDSCR_EL1, 0},
+ {"dbgdtr_el0", DBGDTR_EL0, 0},
+ {"oseccr_el1", OSECCR_EL1, 0},
+ {"dbgvcr32_el2", DBGVCR32_EL2, 0},
+ {"dbgbvr0_el1", DBGBVR0_EL1, 0},
+ {"dbgbvr1_el1", DBGBVR1_EL1, 0},
+ {"dbgbvr2_el1", DBGBVR2_EL1, 0},
+ {"dbgbvr3_el1", DBGBVR3_EL1, 0},
+ {"dbgbvr4_el1", DBGBVR4_EL1, 0},
+ {"dbgbvr5_el1", DBGBVR5_EL1, 0},
+ {"dbgbvr6_el1", DBGBVR6_EL1, 0},
+ {"dbgbvr7_el1", DBGBVR7_EL1, 0},
+ {"dbgbvr8_el1", DBGBVR8_EL1, 0},
+ {"dbgbvr9_el1", DBGBVR9_EL1, 0},
+ {"dbgbvr10_el1", DBGBVR10_EL1, 0},
+ {"dbgbvr11_el1", DBGBVR11_EL1, 0},
+ {"dbgbvr12_el1", DBGBVR12_EL1, 0},
+ {"dbgbvr13_el1", DBGBVR13_EL1, 0},
+ {"dbgbvr14_el1", DBGBVR14_EL1, 0},
+ {"dbgbvr15_el1", DBGBVR15_EL1, 0},
+ {"dbgbcr0_el1", DBGBCR0_EL1, 0},
+ {"dbgbcr1_el1", DBGBCR1_EL1, 0},
+ {"dbgbcr2_el1", DBGBCR2_EL1, 0},
+ {"dbgbcr3_el1", DBGBCR3_EL1, 0},
+ {"dbgbcr4_el1", DBGBCR4_EL1, 0},
+ {"dbgbcr5_el1", DBGBCR5_EL1, 0},
+ {"dbgbcr6_el1", DBGBCR6_EL1, 0},
+ {"dbgbcr7_el1", DBGBCR7_EL1, 0},
+ {"dbgbcr8_el1", DBGBCR8_EL1, 0},
+ {"dbgbcr9_el1", DBGBCR9_EL1, 0},
+ {"dbgbcr10_el1", DBGBCR10_EL1, 0},
+ {"dbgbcr11_el1", DBGBCR11_EL1, 0},
+ {"dbgbcr12_el1", DBGBCR12_EL1, 0},
+ {"dbgbcr13_el1", DBGBCR13_EL1, 0},
+ {"dbgbcr14_el1", DBGBCR14_EL1, 0},
+ {"dbgbcr15_el1", DBGBCR15_EL1, 0},
+ {"dbgwvr0_el1", DBGWVR0_EL1, 0},
+ {"dbgwvr1_el1", DBGWVR1_EL1, 0},
+ {"dbgwvr2_el1", DBGWVR2_EL1, 0},
+ {"dbgwvr3_el1", DBGWVR3_EL1, 0},
+ {"dbgwvr4_el1", DBGWVR4_EL1, 0},
+ {"dbgwvr5_el1", DBGWVR5_EL1, 0},
+ {"dbgwvr6_el1", DBGWVR6_EL1, 0},
+ {"dbgwvr7_el1", DBGWVR7_EL1, 0},
+ {"dbgwvr8_el1", DBGWVR8_EL1, 0},
+ {"dbgwvr9_el1", DBGWVR9_EL1, 0},
+ {"dbgwvr10_el1", DBGWVR10_EL1, 0},
+ {"dbgwvr11_el1", DBGWVR11_EL1, 0},
+ {"dbgwvr12_el1", DBGWVR12_EL1, 0},
+ {"dbgwvr13_el1", DBGWVR13_EL1, 0},
+ {"dbgwvr14_el1", DBGWVR14_EL1, 0},
+ {"dbgwvr15_el1", DBGWVR15_EL1, 0},
+ {"dbgwcr0_el1", DBGWCR0_EL1, 0},
+ {"dbgwcr1_el1", DBGWCR1_EL1, 0},
+ {"dbgwcr2_el1", DBGWCR2_EL1, 0},
+ {"dbgwcr3_el1", DBGWCR3_EL1, 0},
+ {"dbgwcr4_el1", DBGWCR4_EL1, 0},
+ {"dbgwcr5_el1", DBGWCR5_EL1, 0},
+ {"dbgwcr6_el1", DBGWCR6_EL1, 0},
+ {"dbgwcr7_el1", DBGWCR7_EL1, 0},
+ {"dbgwcr8_el1", DBGWCR8_EL1, 0},
+ {"dbgwcr9_el1", DBGWCR9_EL1, 0},
+ {"dbgwcr10_el1", DBGWCR10_EL1, 0},
+ {"dbgwcr11_el1", DBGWCR11_EL1, 0},
+ {"dbgwcr12_el1", DBGWCR12_EL1, 0},
+ {"dbgwcr13_el1", DBGWCR13_EL1, 0},
+ {"dbgwcr14_el1", DBGWCR14_EL1, 0},
+ {"dbgwcr15_el1", DBGWCR15_EL1, 0},
+ {"teehbr32_el1", TEEHBR32_EL1, 0},
+ {"osdlr_el1", OSDLR_EL1, 0},
+ {"dbgprcr_el1", DBGPRCR_EL1, 0},
+ {"dbgclaimset_el1", DBGCLAIMSET_EL1, 0},
+ {"dbgclaimclr_el1", DBGCLAIMCLR_EL1, 0},
+ {"csselr_el1", CSSELR_EL1, 0},
+ {"vpidr_el2", VPIDR_EL2, 0},
+ {"vmpidr_el2", VMPIDR_EL2, 0},
+ {"sctlr_el1", SCTLR_EL1, 0},
+ {"sctlr_el2", SCTLR_EL2, 0},
+ {"sctlr_el3", SCTLR_EL3, 0},
+ {"actlr_el1", ACTLR_EL1, 0},
+ {"actlr_el2", ACTLR_EL2, 0},
+ {"actlr_el3", ACTLR_EL3, 0},
+ {"cpacr_el1", CPACR_EL1, 0},
+ {"hcr_el2", HCR_EL2, 0},
+ {"scr_el3", SCR_EL3, 0},
+ {"mdcr_el2", MDCR_EL2, 0},
+ {"sder32_el3", SDER32_EL3, 0},
+ {"cptr_el2", CPTR_EL2, 0},
+ {"cptr_el3", CPTR_EL3, 0},
+ {"hstr_el2", HSTR_EL2, 0},
+ {"hacr_el2", HACR_EL2, 0},
+ {"mdcr_el3", MDCR_EL3, 0},
+ {"ttbr0_el1", TTBR0_EL1, 0},
+ {"ttbr0_el2", TTBR0_EL2, 0},
+ {"ttbr0_el3", TTBR0_EL3, 0},
+ {"ttbr1_el1", TTBR1_EL1, 0},
+ {"tcr_el1", TCR_EL1, 0},
+ {"tcr_el2", TCR_EL2, 0},
+ {"tcr_el3", TCR_EL3, 0},
+ {"vttbr_el2", VTTBR_EL2, 0},
+ {"vtcr_el2", VTCR_EL2, 0},
+ {"dacr32_el2", DACR32_EL2, 0},
+ {"spsr_el1", SPSR_EL1, 0},
+ {"spsr_el2", SPSR_EL2, 0},
+ {"spsr_el3", SPSR_EL3, 0},
+ {"elr_el1", ELR_EL1, 0},
+ {"elr_el2", ELR_EL2, 0},
+ {"elr_el3", ELR_EL3, 0},
+ {"sp_el0", SP_EL0, 0},
+ {"sp_el1", SP_EL1, 0},
+ {"sp_el2", SP_EL2, 0},
+ {"spsel", SPSel, 0},
+ {"nzcv", NZCV, 0},
+ {"daif", DAIF, 0},
+ {"currentel", CurrentEL, 0},
+ {"spsr_irq", SPSR_irq, 0},
+ {"spsr_abt", SPSR_abt, 0},
+ {"spsr_und", SPSR_und, 0},
+ {"spsr_fiq", SPSR_fiq, 0},
+ {"fpcr", FPCR, 0},
+ {"fpsr", FPSR, 0},
+ {"dspsr_el0", DSPSR_EL0, 0},
+ {"dlr_el0", DLR_EL0, 0},
+ {"ifsr32_el2", IFSR32_EL2, 0},
+ {"afsr0_el1", AFSR0_EL1, 0},
+ {"afsr0_el2", AFSR0_EL2, 0},
+ {"afsr0_el3", AFSR0_EL3, 0},
+ {"afsr1_el1", AFSR1_EL1, 0},
+ {"afsr1_el2", AFSR1_EL2, 0},
+ {"afsr1_el3", AFSR1_EL3, 0},
+ {"esr_el1", ESR_EL1, 0},
+ {"esr_el2", ESR_EL2, 0},
+ {"esr_el3", ESR_EL3, 0},
+ {"fpexc32_el2", FPEXC32_EL2, 0},
+ {"far_el1", FAR_EL1, 0},
+ {"far_el2", FAR_EL2, 0},
+ {"far_el3", FAR_EL3, 0},
+ {"hpfar_el2", HPFAR_EL2, 0},
+ {"par_el1", PAR_EL1, 0},
+ {"pmcr_el0", PMCR_EL0, 0},
+ {"pmcntenset_el0", PMCNTENSET_EL0, 0},
+ {"pmcntenclr_el0", PMCNTENCLR_EL0, 0},
+ {"pmovsclr_el0", PMOVSCLR_EL0, 0},
+ {"pmselr_el0", PMSELR_EL0, 0},
+ {"pmccntr_el0", PMCCNTR_EL0, 0},
+ {"pmxevtyper_el0", PMXEVTYPER_EL0, 0},
+ {"pmxevcntr_el0", PMXEVCNTR_EL0, 0},
+ {"pmuserenr_el0", PMUSERENR_EL0, 0},
+ {"pmintenset_el1", PMINTENSET_EL1, 0},
+ {"pmintenclr_el1", PMINTENCLR_EL1, 0},
+ {"pmovsset_el0", PMOVSSET_EL0, 0},
+ {"mair_el1", MAIR_EL1, 0},
+ {"mair_el2", MAIR_EL2, 0},
+ {"mair_el3", MAIR_EL3, 0},
+ {"amair_el1", AMAIR_EL1, 0},
+ {"amair_el2", AMAIR_EL2, 0},
+ {"amair_el3", AMAIR_EL3, 0},
+ {"vbar_el1", VBAR_EL1, 0},
+ {"vbar_el2", VBAR_EL2, 0},
+ {"vbar_el3", VBAR_EL3, 0},
+ {"rmr_el1", RMR_EL1, 0},
+ {"rmr_el2", RMR_EL2, 0},
+ {"rmr_el3", RMR_EL3, 0},
+ {"contextidr_el1", CONTEXTIDR_EL1, 0},
+ {"tpidr_el0", TPIDR_EL0, 0},
+ {"tpidr_el2", TPIDR_EL2, 0},
+ {"tpidr_el3", TPIDR_EL3, 0},
+ {"tpidrro_el0", TPIDRRO_EL0, 0},
+ {"tpidr_el1", TPIDR_EL1, 0},
+ {"cntfrq_el0", CNTFRQ_EL0, 0},
+ {"cntvoff_el2", CNTVOFF_EL2, 0},
+ {"cntkctl_el1", CNTKCTL_EL1, 0},
+ {"cnthctl_el2", CNTHCTL_EL2, 0},
+ {"cntp_tval_el0", CNTP_TVAL_EL0, 0},
+ {"cnthp_tval_el2", CNTHP_TVAL_EL2, 0},
+ {"cntps_tval_el1", CNTPS_TVAL_EL1, 0},
+ {"cntp_ctl_el0", CNTP_CTL_EL0, 0},
+ {"cnthp_ctl_el2", CNTHP_CTL_EL2, 0},
+ {"cntps_ctl_el1", CNTPS_CTL_EL1, 0},
+ {"cntp_cval_el0", CNTP_CVAL_EL0, 0},
+ {"cnthp_cval_el2", CNTHP_CVAL_EL2, 0},
+ {"cntps_cval_el1", CNTPS_CVAL_EL1, 0},
+ {"cntv_tval_el0", CNTV_TVAL_EL0, 0},
+ {"cntv_ctl_el0", CNTV_CTL_EL0, 0},
+ {"cntv_cval_el0", CNTV_CVAL_EL0, 0},
+ {"pmevcntr0_el0", PMEVCNTR0_EL0, 0},
+ {"pmevcntr1_el0", PMEVCNTR1_EL0, 0},
+ {"pmevcntr2_el0", PMEVCNTR2_EL0, 0},
+ {"pmevcntr3_el0", PMEVCNTR3_EL0, 0},
+ {"pmevcntr4_el0", PMEVCNTR4_EL0, 0},
+ {"pmevcntr5_el0", PMEVCNTR5_EL0, 0},
+ {"pmevcntr6_el0", PMEVCNTR6_EL0, 0},
+ {"pmevcntr7_el0", PMEVCNTR7_EL0, 0},
+ {"pmevcntr8_el0", PMEVCNTR8_EL0, 0},
+ {"pmevcntr9_el0", PMEVCNTR9_EL0, 0},
+ {"pmevcntr10_el0", PMEVCNTR10_EL0, 0},
+ {"pmevcntr11_el0", PMEVCNTR11_EL0, 0},
+ {"pmevcntr12_el0", PMEVCNTR12_EL0, 0},
+ {"pmevcntr13_el0", PMEVCNTR13_EL0, 0},
+ {"pmevcntr14_el0", PMEVCNTR14_EL0, 0},
+ {"pmevcntr15_el0", PMEVCNTR15_EL0, 0},
+ {"pmevcntr16_el0", PMEVCNTR16_EL0, 0},
+ {"pmevcntr17_el0", PMEVCNTR17_EL0, 0},
+ {"pmevcntr18_el0", PMEVCNTR18_EL0, 0},
+ {"pmevcntr19_el0", PMEVCNTR19_EL0, 0},
+ {"pmevcntr20_el0", PMEVCNTR20_EL0, 0},
+ {"pmevcntr21_el0", PMEVCNTR21_EL0, 0},
+ {"pmevcntr22_el0", PMEVCNTR22_EL0, 0},
+ {"pmevcntr23_el0", PMEVCNTR23_EL0, 0},
+ {"pmevcntr24_el0", PMEVCNTR24_EL0, 0},
+ {"pmevcntr25_el0", PMEVCNTR25_EL0, 0},
+ {"pmevcntr26_el0", PMEVCNTR26_EL0, 0},
+ {"pmevcntr27_el0", PMEVCNTR27_EL0, 0},
+ {"pmevcntr28_el0", PMEVCNTR28_EL0, 0},
+ {"pmevcntr29_el0", PMEVCNTR29_EL0, 0},
+ {"pmevcntr30_el0", PMEVCNTR30_EL0, 0},
+ {"pmccfiltr_el0", PMCCFILTR_EL0, 0},
+ {"pmevtyper0_el0", PMEVTYPER0_EL0, 0},
+ {"pmevtyper1_el0", PMEVTYPER1_EL0, 0},
+ {"pmevtyper2_el0", PMEVTYPER2_EL0, 0},
+ {"pmevtyper3_el0", PMEVTYPER3_EL0, 0},
+ {"pmevtyper4_el0", PMEVTYPER4_EL0, 0},
+ {"pmevtyper5_el0", PMEVTYPER5_EL0, 0},
+ {"pmevtyper6_el0", PMEVTYPER6_EL0, 0},
+ {"pmevtyper7_el0", PMEVTYPER7_EL0, 0},
+ {"pmevtyper8_el0", PMEVTYPER8_EL0, 0},
+ {"pmevtyper9_el0", PMEVTYPER9_EL0, 0},
+ {"pmevtyper10_el0", PMEVTYPER10_EL0, 0},
+ {"pmevtyper11_el0", PMEVTYPER11_EL0, 0},
+ {"pmevtyper12_el0", PMEVTYPER12_EL0, 0},
+ {"pmevtyper13_el0", PMEVTYPER13_EL0, 0},
+ {"pmevtyper14_el0", PMEVTYPER14_EL0, 0},
+ {"pmevtyper15_el0", PMEVTYPER15_EL0, 0},
+ {"pmevtyper16_el0", PMEVTYPER16_EL0, 0},
+ {"pmevtyper17_el0", PMEVTYPER17_EL0, 0},
+ {"pmevtyper18_el0", PMEVTYPER18_EL0, 0},
+ {"pmevtyper19_el0", PMEVTYPER19_EL0, 0},
+ {"pmevtyper20_el0", PMEVTYPER20_EL0, 0},
+ {"pmevtyper21_el0", PMEVTYPER21_EL0, 0},
+ {"pmevtyper22_el0", PMEVTYPER22_EL0, 0},
+ {"pmevtyper23_el0", PMEVTYPER23_EL0, 0},
+ {"pmevtyper24_el0", PMEVTYPER24_EL0, 0},
+ {"pmevtyper25_el0", PMEVTYPER25_EL0, 0},
+ {"pmevtyper26_el0", PMEVTYPER26_EL0, 0},
+ {"pmevtyper27_el0", PMEVTYPER27_EL0, 0},
+ {"pmevtyper28_el0", PMEVTYPER28_EL0, 0},
+ {"pmevtyper29_el0", PMEVTYPER29_EL0, 0},
+ {"pmevtyper30_el0", PMEVTYPER30_EL0, 0},
// Trace registers
- {"trcprgctlr", TRCPRGCTLR},
- {"trcprocselr", TRCPROCSELR},
- {"trcconfigr", TRCCONFIGR},
- {"trcauxctlr", TRCAUXCTLR},
- {"trceventctl0r", TRCEVENTCTL0R},
- {"trceventctl1r", TRCEVENTCTL1R},
- {"trcstallctlr", TRCSTALLCTLR},
- {"trctsctlr", TRCTSCTLR},
- {"trcsyncpr", TRCSYNCPR},
- {"trcccctlr", TRCCCCTLR},
- {"trcbbctlr", TRCBBCTLR},
- {"trctraceidr", TRCTRACEIDR},
- {"trcqctlr", TRCQCTLR},
- {"trcvictlr", TRCVICTLR},
- {"trcviiectlr", TRCVIIECTLR},
- {"trcvissctlr", TRCVISSCTLR},
- {"trcvipcssctlr", TRCVIPCSSCTLR},
- {"trcvdctlr", TRCVDCTLR},
- {"trcvdsacctlr", TRCVDSACCTLR},
- {"trcvdarcctlr", TRCVDARCCTLR},
- {"trcseqevr0", TRCSEQEVR0},
- {"trcseqevr1", TRCSEQEVR1},
- {"trcseqevr2", TRCSEQEVR2},
- {"trcseqrstevr", TRCSEQRSTEVR},
- {"trcseqstr", TRCSEQSTR},
- {"trcextinselr", TRCEXTINSELR},
- {"trccntrldvr0", TRCCNTRLDVR0},
- {"trccntrldvr1", TRCCNTRLDVR1},
- {"trccntrldvr2", TRCCNTRLDVR2},
- {"trccntrldvr3", TRCCNTRLDVR3},
- {"trccntctlr0", TRCCNTCTLR0},
- {"trccntctlr1", TRCCNTCTLR1},
- {"trccntctlr2", TRCCNTCTLR2},
- {"trccntctlr3", TRCCNTCTLR3},
- {"trccntvr0", TRCCNTVR0},
- {"trccntvr1", TRCCNTVR1},
- {"trccntvr2", TRCCNTVR2},
- {"trccntvr3", TRCCNTVR3},
- {"trcimspec0", TRCIMSPEC0},
- {"trcimspec1", TRCIMSPEC1},
- {"trcimspec2", TRCIMSPEC2},
- {"trcimspec3", TRCIMSPEC3},
- {"trcimspec4", TRCIMSPEC4},
- {"trcimspec5", TRCIMSPEC5},
- {"trcimspec6", TRCIMSPEC6},
- {"trcimspec7", TRCIMSPEC7},
- {"trcrsctlr2", TRCRSCTLR2},
- {"trcrsctlr3", TRCRSCTLR3},
- {"trcrsctlr4", TRCRSCTLR4},
- {"trcrsctlr5", TRCRSCTLR5},
- {"trcrsctlr6", TRCRSCTLR6},
- {"trcrsctlr7", TRCRSCTLR7},
- {"trcrsctlr8", TRCRSCTLR8},
- {"trcrsctlr9", TRCRSCTLR9},
- {"trcrsctlr10", TRCRSCTLR10},
- {"trcrsctlr11", TRCRSCTLR11},
- {"trcrsctlr12", TRCRSCTLR12},
- {"trcrsctlr13", TRCRSCTLR13},
- {"trcrsctlr14", TRCRSCTLR14},
- {"trcrsctlr15", TRCRSCTLR15},
- {"trcrsctlr16", TRCRSCTLR16},
- {"trcrsctlr17", TRCRSCTLR17},
- {"trcrsctlr18", TRCRSCTLR18},
- {"trcrsctlr19", TRCRSCTLR19},
- {"trcrsctlr20", TRCRSCTLR20},
- {"trcrsctlr21", TRCRSCTLR21},
- {"trcrsctlr22", TRCRSCTLR22},
- {"trcrsctlr23", TRCRSCTLR23},
- {"trcrsctlr24", TRCRSCTLR24},
- {"trcrsctlr25", TRCRSCTLR25},
- {"trcrsctlr26", TRCRSCTLR26},
- {"trcrsctlr27", TRCRSCTLR27},
- {"trcrsctlr28", TRCRSCTLR28},
- {"trcrsctlr29", TRCRSCTLR29},
- {"trcrsctlr30", TRCRSCTLR30},
- {"trcrsctlr31", TRCRSCTLR31},
- {"trcssccr0", TRCSSCCR0},
- {"trcssccr1", TRCSSCCR1},
- {"trcssccr2", TRCSSCCR2},
- {"trcssccr3", TRCSSCCR3},
- {"trcssccr4", TRCSSCCR4},
- {"trcssccr5", TRCSSCCR5},
- {"trcssccr6", TRCSSCCR6},
- {"trcssccr7", TRCSSCCR7},
- {"trcsscsr0", TRCSSCSR0},
- {"trcsscsr1", TRCSSCSR1},
- {"trcsscsr2", TRCSSCSR2},
- {"trcsscsr3", TRCSSCSR3},
- {"trcsscsr4", TRCSSCSR4},
- {"trcsscsr5", TRCSSCSR5},
- {"trcsscsr6", TRCSSCSR6},
- {"trcsscsr7", TRCSSCSR7},
- {"trcsspcicr0", TRCSSPCICR0},
- {"trcsspcicr1", TRCSSPCICR1},
- {"trcsspcicr2", TRCSSPCICR2},
- {"trcsspcicr3", TRCSSPCICR3},
- {"trcsspcicr4", TRCSSPCICR4},
- {"trcsspcicr5", TRCSSPCICR5},
- {"trcsspcicr6", TRCSSPCICR6},
- {"trcsspcicr7", TRCSSPCICR7},
- {"trcpdcr", TRCPDCR},
- {"trcacvr0", TRCACVR0},
- {"trcacvr1", TRCACVR1},
- {"trcacvr2", TRCACVR2},
- {"trcacvr3", TRCACVR3},
- {"trcacvr4", TRCACVR4},
- {"trcacvr5", TRCACVR5},
- {"trcacvr6", TRCACVR6},
- {"trcacvr7", TRCACVR7},
- {"trcacvr8", TRCACVR8},
- {"trcacvr9", TRCACVR9},
- {"trcacvr10", TRCACVR10},
- {"trcacvr11", TRCACVR11},
- {"trcacvr12", TRCACVR12},
- {"trcacvr13", TRCACVR13},
- {"trcacvr14", TRCACVR14},
- {"trcacvr15", TRCACVR15},
- {"trcacatr0", TRCACATR0},
- {"trcacatr1", TRCACATR1},
- {"trcacatr2", TRCACATR2},
- {"trcacatr3", TRCACATR3},
- {"trcacatr4", TRCACATR4},
- {"trcacatr5", TRCACATR5},
- {"trcacatr6", TRCACATR6},
- {"trcacatr7", TRCACATR7},
- {"trcacatr8", TRCACATR8},
- {"trcacatr9", TRCACATR9},
- {"trcacatr10", TRCACATR10},
- {"trcacatr11", TRCACATR11},
- {"trcacatr12", TRCACATR12},
- {"trcacatr13", TRCACATR13},
- {"trcacatr14", TRCACATR14},
- {"trcacatr15", TRCACATR15},
- {"trcdvcvr0", TRCDVCVR0},
- {"trcdvcvr1", TRCDVCVR1},
- {"trcdvcvr2", TRCDVCVR2},
- {"trcdvcvr3", TRCDVCVR3},
- {"trcdvcvr4", TRCDVCVR4},
- {"trcdvcvr5", TRCDVCVR5},
- {"trcdvcvr6", TRCDVCVR6},
- {"trcdvcvr7", TRCDVCVR7},
- {"trcdvcmr0", TRCDVCMR0},
- {"trcdvcmr1", TRCDVCMR1},
- {"trcdvcmr2", TRCDVCMR2},
- {"trcdvcmr3", TRCDVCMR3},
- {"trcdvcmr4", TRCDVCMR4},
- {"trcdvcmr5", TRCDVCMR5},
- {"trcdvcmr6", TRCDVCMR6},
- {"trcdvcmr7", TRCDVCMR7},
- {"trccidcvr0", TRCCIDCVR0},
- {"trccidcvr1", TRCCIDCVR1},
- {"trccidcvr2", TRCCIDCVR2},
- {"trccidcvr3", TRCCIDCVR3},
- {"trccidcvr4", TRCCIDCVR4},
- {"trccidcvr5", TRCCIDCVR5},
- {"trccidcvr6", TRCCIDCVR6},
- {"trccidcvr7", TRCCIDCVR7},
- {"trcvmidcvr0", TRCVMIDCVR0},
- {"trcvmidcvr1", TRCVMIDCVR1},
- {"trcvmidcvr2", TRCVMIDCVR2},
- {"trcvmidcvr3", TRCVMIDCVR3},
- {"trcvmidcvr4", TRCVMIDCVR4},
- {"trcvmidcvr5", TRCVMIDCVR5},
- {"trcvmidcvr6", TRCVMIDCVR6},
- {"trcvmidcvr7", TRCVMIDCVR7},
- {"trccidcctlr0", TRCCIDCCTLR0},
- {"trccidcctlr1", TRCCIDCCTLR1},
- {"trcvmidcctlr0", TRCVMIDCCTLR0},
- {"trcvmidcctlr1", TRCVMIDCCTLR1},
- {"trcitctrl", TRCITCTRL},
- {"trcclaimset", TRCCLAIMSET},
- {"trcclaimclr", TRCCLAIMCLR},
+ {"trcprgctlr", TRCPRGCTLR, 0},
+ {"trcprocselr", TRCPROCSELR, 0},
+ {"trcconfigr", TRCCONFIGR, 0},
+ {"trcauxctlr", TRCAUXCTLR, 0},
+ {"trceventctl0r", TRCEVENTCTL0R, 0},
+ {"trceventctl1r", TRCEVENTCTL1R, 0},
+ {"trcstallctlr", TRCSTALLCTLR, 0},
+ {"trctsctlr", TRCTSCTLR, 0},
+ {"trcsyncpr", TRCSYNCPR, 0},
+ {"trcccctlr", TRCCCCTLR, 0},
+ {"trcbbctlr", TRCBBCTLR, 0},
+ {"trctraceidr", TRCTRACEIDR, 0},
+ {"trcqctlr", TRCQCTLR, 0},
+ {"trcvictlr", TRCVICTLR, 0},
+ {"trcviiectlr", TRCVIIECTLR, 0},
+ {"trcvissctlr", TRCVISSCTLR, 0},
+ {"trcvipcssctlr", TRCVIPCSSCTLR, 0},
+ {"trcvdctlr", TRCVDCTLR, 0},
+ {"trcvdsacctlr", TRCVDSACCTLR, 0},
+ {"trcvdarcctlr", TRCVDARCCTLR, 0},
+ {"trcseqevr0", TRCSEQEVR0, 0},
+ {"trcseqevr1", TRCSEQEVR1, 0},
+ {"trcseqevr2", TRCSEQEVR2, 0},
+ {"trcseqrstevr", TRCSEQRSTEVR, 0},
+ {"trcseqstr", TRCSEQSTR, 0},
+ {"trcextinselr", TRCEXTINSELR, 0},
+ {"trccntrldvr0", TRCCNTRLDVR0, 0},
+ {"trccntrldvr1", TRCCNTRLDVR1, 0},
+ {"trccntrldvr2", TRCCNTRLDVR2, 0},
+ {"trccntrldvr3", TRCCNTRLDVR3, 0},
+ {"trccntctlr0", TRCCNTCTLR0, 0},
+ {"trccntctlr1", TRCCNTCTLR1, 0},
+ {"trccntctlr2", TRCCNTCTLR2, 0},
+ {"trccntctlr3", TRCCNTCTLR3, 0},
+ {"trccntvr0", TRCCNTVR0, 0},
+ {"trccntvr1", TRCCNTVR1, 0},
+ {"trccntvr2", TRCCNTVR2, 0},
+ {"trccntvr3", TRCCNTVR3, 0},
+ {"trcimspec0", TRCIMSPEC0, 0},
+ {"trcimspec1", TRCIMSPEC1, 0},
+ {"trcimspec2", TRCIMSPEC2, 0},
+ {"trcimspec3", TRCIMSPEC3, 0},
+ {"trcimspec4", TRCIMSPEC4, 0},
+ {"trcimspec5", TRCIMSPEC5, 0},
+ {"trcimspec6", TRCIMSPEC6, 0},
+ {"trcimspec7", TRCIMSPEC7, 0},
+ {"trcrsctlr2", TRCRSCTLR2, 0},
+ {"trcrsctlr3", TRCRSCTLR3, 0},
+ {"trcrsctlr4", TRCRSCTLR4, 0},
+ {"trcrsctlr5", TRCRSCTLR5, 0},
+ {"trcrsctlr6", TRCRSCTLR6, 0},
+ {"trcrsctlr7", TRCRSCTLR7, 0},
+ {"trcrsctlr8", TRCRSCTLR8, 0},
+ {"trcrsctlr9", TRCRSCTLR9, 0},
+ {"trcrsctlr10", TRCRSCTLR10, 0},
+ {"trcrsctlr11", TRCRSCTLR11, 0},
+ {"trcrsctlr12", TRCRSCTLR12, 0},
+ {"trcrsctlr13", TRCRSCTLR13, 0},
+ {"trcrsctlr14", TRCRSCTLR14, 0},
+ {"trcrsctlr15", TRCRSCTLR15, 0},
+ {"trcrsctlr16", TRCRSCTLR16, 0},
+ {"trcrsctlr17", TRCRSCTLR17, 0},
+ {"trcrsctlr18", TRCRSCTLR18, 0},
+ {"trcrsctlr19", TRCRSCTLR19, 0},
+ {"trcrsctlr20", TRCRSCTLR20, 0},
+ {"trcrsctlr21", TRCRSCTLR21, 0},
+ {"trcrsctlr22", TRCRSCTLR22, 0},
+ {"trcrsctlr23", TRCRSCTLR23, 0},
+ {"trcrsctlr24", TRCRSCTLR24, 0},
+ {"trcrsctlr25", TRCRSCTLR25, 0},
+ {"trcrsctlr26", TRCRSCTLR26, 0},
+ {"trcrsctlr27", TRCRSCTLR27, 0},
+ {"trcrsctlr28", TRCRSCTLR28, 0},
+ {"trcrsctlr29", TRCRSCTLR29, 0},
+ {"trcrsctlr30", TRCRSCTLR30, 0},
+ {"trcrsctlr31", TRCRSCTLR31, 0},
+ {"trcssccr0", TRCSSCCR0, 0},
+ {"trcssccr1", TRCSSCCR1, 0},
+ {"trcssccr2", TRCSSCCR2, 0},
+ {"trcssccr3", TRCSSCCR3, 0},
+ {"trcssccr4", TRCSSCCR4, 0},
+ {"trcssccr5", TRCSSCCR5, 0},
+ {"trcssccr6", TRCSSCCR6, 0},
+ {"trcssccr7", TRCSSCCR7, 0},
+ {"trcsscsr0", TRCSSCSR0, 0},
+ {"trcsscsr1", TRCSSCSR1, 0},
+ {"trcsscsr2", TRCSSCSR2, 0},
+ {"trcsscsr3", TRCSSCSR3, 0},
+ {"trcsscsr4", TRCSSCSR4, 0},
+ {"trcsscsr5", TRCSSCSR5, 0},
+ {"trcsscsr6", TRCSSCSR6, 0},
+ {"trcsscsr7", TRCSSCSR7, 0},
+ {"trcsspcicr0", TRCSSPCICR0, 0},
+ {"trcsspcicr1", TRCSSPCICR1, 0},
+ {"trcsspcicr2", TRCSSPCICR2, 0},
+ {"trcsspcicr3", TRCSSPCICR3, 0},
+ {"trcsspcicr4", TRCSSPCICR4, 0},
+ {"trcsspcicr5", TRCSSPCICR5, 0},
+ {"trcsspcicr6", TRCSSPCICR6, 0},
+ {"trcsspcicr7", TRCSSPCICR7, 0},
+ {"trcpdcr", TRCPDCR, 0},
+ {"trcacvr0", TRCACVR0, 0},
+ {"trcacvr1", TRCACVR1, 0},
+ {"trcacvr2", TRCACVR2, 0},
+ {"trcacvr3", TRCACVR3, 0},
+ {"trcacvr4", TRCACVR4, 0},
+ {"trcacvr5", TRCACVR5, 0},
+ {"trcacvr6", TRCACVR6, 0},
+ {"trcacvr7", TRCACVR7, 0},
+ {"trcacvr8", TRCACVR8, 0},
+ {"trcacvr9", TRCACVR9, 0},
+ {"trcacvr10", TRCACVR10, 0},
+ {"trcacvr11", TRCACVR11, 0},
+ {"trcacvr12", TRCACVR12, 0},
+ {"trcacvr13", TRCACVR13, 0},
+ {"trcacvr14", TRCACVR14, 0},
+ {"trcacvr15", TRCACVR15, 0},
+ {"trcacatr0", TRCACATR0, 0},
+ {"trcacatr1", TRCACATR1, 0},
+ {"trcacatr2", TRCACATR2, 0},
+ {"trcacatr3", TRCACATR3, 0},
+ {"trcacatr4", TRCACATR4, 0},
+ {"trcacatr5", TRCACATR5, 0},
+ {"trcacatr6", TRCACATR6, 0},
+ {"trcacatr7", TRCACATR7, 0},
+ {"trcacatr8", TRCACATR8, 0},
+ {"trcacatr9", TRCACATR9, 0},
+ {"trcacatr10", TRCACATR10, 0},
+ {"trcacatr11", TRCACATR11, 0},
+ {"trcacatr12", TRCACATR12, 0},
+ {"trcacatr13", TRCACATR13, 0},
+ {"trcacatr14", TRCACATR14, 0},
+ {"trcacatr15", TRCACATR15, 0},
+ {"trcdvcvr0", TRCDVCVR0, 0},
+ {"trcdvcvr1", TRCDVCVR1, 0},
+ {"trcdvcvr2", TRCDVCVR2, 0},
+ {"trcdvcvr3", TRCDVCVR3, 0},
+ {"trcdvcvr4", TRCDVCVR4, 0},
+ {"trcdvcvr5", TRCDVCVR5, 0},
+ {"trcdvcvr6", TRCDVCVR6, 0},
+ {"trcdvcvr7", TRCDVCVR7, 0},
+ {"trcdvcmr0", TRCDVCMR0, 0},
+ {"trcdvcmr1", TRCDVCMR1, 0},
+ {"trcdvcmr2", TRCDVCMR2, 0},
+ {"trcdvcmr3", TRCDVCMR3, 0},
+ {"trcdvcmr4", TRCDVCMR4, 0},
+ {"trcdvcmr5", TRCDVCMR5, 0},
+ {"trcdvcmr6", TRCDVCMR6, 0},
+ {"trcdvcmr7", TRCDVCMR7, 0},
+ {"trccidcvr0", TRCCIDCVR0, 0},
+ {"trccidcvr1", TRCCIDCVR1, 0},
+ {"trccidcvr2", TRCCIDCVR2, 0},
+ {"trccidcvr3", TRCCIDCVR3, 0},
+ {"trccidcvr4", TRCCIDCVR4, 0},
+ {"trccidcvr5", TRCCIDCVR5, 0},
+ {"trccidcvr6", TRCCIDCVR6, 0},
+ {"trccidcvr7", TRCCIDCVR7, 0},
+ {"trcvmidcvr0", TRCVMIDCVR0, 0},
+ {"trcvmidcvr1", TRCVMIDCVR1, 0},
+ {"trcvmidcvr2", TRCVMIDCVR2, 0},
+ {"trcvmidcvr3", TRCVMIDCVR3, 0},
+ {"trcvmidcvr4", TRCVMIDCVR4, 0},
+ {"trcvmidcvr5", TRCVMIDCVR5, 0},
+ {"trcvmidcvr6", TRCVMIDCVR6, 0},
+ {"trcvmidcvr7", TRCVMIDCVR7, 0},
+ {"trccidcctlr0", TRCCIDCCTLR0, 0},
+ {"trccidcctlr1", TRCCIDCCTLR1, 0},
+ {"trcvmidcctlr0", TRCVMIDCCTLR0, 0},
+ {"trcvmidcctlr1", TRCVMIDCCTLR1, 0},
+ {"trcitctrl", TRCITCTRL, 0},
+ {"trcclaimset", TRCCLAIMSET, 0},
+ {"trcclaimclr", TRCCLAIMCLR, 0},
// GICv3 registers
- {"icc_bpr1_el1", ICC_BPR1_EL1},
- {"icc_bpr0_el1", ICC_BPR0_EL1},
- {"icc_pmr_el1", ICC_PMR_EL1},
- {"icc_ctlr_el1", ICC_CTLR_EL1},
- {"icc_ctlr_el3", ICC_CTLR_EL3},
- {"icc_sre_el1", ICC_SRE_EL1},
- {"icc_sre_el2", ICC_SRE_EL2},
- {"icc_sre_el3", ICC_SRE_EL3},
- {"icc_igrpen0_el1", ICC_IGRPEN0_EL1},
- {"icc_igrpen1_el1", ICC_IGRPEN1_EL1},
- {"icc_igrpen1_el3", ICC_IGRPEN1_EL3},
- {"icc_seien_el1", ICC_SEIEN_EL1},
- {"icc_ap0r0_el1", ICC_AP0R0_EL1},
- {"icc_ap0r1_el1", ICC_AP0R1_EL1},
- {"icc_ap0r2_el1", ICC_AP0R2_EL1},
- {"icc_ap0r3_el1", ICC_AP0R3_EL1},
- {"icc_ap1r0_el1", ICC_AP1R0_EL1},
- {"icc_ap1r1_el1", ICC_AP1R1_EL1},
- {"icc_ap1r2_el1", ICC_AP1R2_EL1},
- {"icc_ap1r3_el1", ICC_AP1R3_EL1},
- {"ich_ap0r0_el2", ICH_AP0R0_EL2},
- {"ich_ap0r1_el2", ICH_AP0R1_EL2},
- {"ich_ap0r2_el2", ICH_AP0R2_EL2},
- {"ich_ap0r3_el2", ICH_AP0R3_EL2},
- {"ich_ap1r0_el2", ICH_AP1R0_EL2},
- {"ich_ap1r1_el2", ICH_AP1R1_EL2},
- {"ich_ap1r2_el2", ICH_AP1R2_EL2},
- {"ich_ap1r3_el2", ICH_AP1R3_EL2},
- {"ich_hcr_el2", ICH_HCR_EL2},
- {"ich_misr_el2", ICH_MISR_EL2},
- {"ich_vmcr_el2", ICH_VMCR_EL2},
- {"ich_vseir_el2", ICH_VSEIR_EL2},
- {"ich_lr0_el2", ICH_LR0_EL2},
- {"ich_lr1_el2", ICH_LR1_EL2},
- {"ich_lr2_el2", ICH_LR2_EL2},
- {"ich_lr3_el2", ICH_LR3_EL2},
- {"ich_lr4_el2", ICH_LR4_EL2},
- {"ich_lr5_el2", ICH_LR5_EL2},
- {"ich_lr6_el2", ICH_LR6_EL2},
- {"ich_lr7_el2", ICH_LR7_EL2},
- {"ich_lr8_el2", ICH_LR8_EL2},
- {"ich_lr9_el2", ICH_LR9_EL2},
- {"ich_lr10_el2", ICH_LR10_EL2},
- {"ich_lr11_el2", ICH_LR11_EL2},
- {"ich_lr12_el2", ICH_LR12_EL2},
- {"ich_lr13_el2", ICH_LR13_EL2},
- {"ich_lr14_el2", ICH_LR14_EL2},
- {"ich_lr15_el2", ICH_LR15_EL2}
-};
-
-const AArch64NamedImmMapper::Mapping
-AArch64SysReg::SysRegMapper::CycloneSysRegMappings[] = {
- {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+ {"icc_bpr1_el1", ICC_BPR1_EL1, 0},
+ {"icc_bpr0_el1", ICC_BPR0_EL1, 0},
+ {"icc_pmr_el1", ICC_PMR_EL1, 0},
+ {"icc_ctlr_el1", ICC_CTLR_EL1, 0},
+ {"icc_ctlr_el3", ICC_CTLR_EL3, 0},
+ {"icc_sre_el1", ICC_SRE_EL1, 0},
+ {"icc_sre_el2", ICC_SRE_EL2, 0},
+ {"icc_sre_el3", ICC_SRE_EL3, 0},
+ {"icc_igrpen0_el1", ICC_IGRPEN0_EL1, 0},
+ {"icc_igrpen1_el1", ICC_IGRPEN1_EL1, 0},
+ {"icc_igrpen1_el3", ICC_IGRPEN1_EL3, 0},
+ {"icc_seien_el1", ICC_SEIEN_EL1, 0},
+ {"icc_ap0r0_el1", ICC_AP0R0_EL1, 0},
+ {"icc_ap0r1_el1", ICC_AP0R1_EL1, 0},
+ {"icc_ap0r2_el1", ICC_AP0R2_EL1, 0},
+ {"icc_ap0r3_el1", ICC_AP0R3_EL1, 0},
+ {"icc_ap1r0_el1", ICC_AP1R0_EL1, 0},
+ {"icc_ap1r1_el1", ICC_AP1R1_EL1, 0},
+ {"icc_ap1r2_el1", ICC_AP1R2_EL1, 0},
+ {"icc_ap1r3_el1", ICC_AP1R3_EL1, 0},
+ {"ich_ap0r0_el2", ICH_AP0R0_EL2, 0},
+ {"ich_ap0r1_el2", ICH_AP0R1_EL2, 0},
+ {"ich_ap0r2_el2", ICH_AP0R2_EL2, 0},
+ {"ich_ap0r3_el2", ICH_AP0R3_EL2, 0},
+ {"ich_ap1r0_el2", ICH_AP1R0_EL2, 0},
+ {"ich_ap1r1_el2", ICH_AP1R1_EL2, 0},
+ {"ich_ap1r2_el2", ICH_AP1R2_EL2, 0},
+ {"ich_ap1r3_el2", ICH_AP1R3_EL2, 0},
+ {"ich_hcr_el2", ICH_HCR_EL2, 0},
+ {"ich_misr_el2", ICH_MISR_EL2, 0},
+ {"ich_vmcr_el2", ICH_VMCR_EL2, 0},
+ {"ich_vseir_el2", ICH_VSEIR_EL2, 0},
+ {"ich_lr0_el2", ICH_LR0_EL2, 0},
+ {"ich_lr1_el2", ICH_LR1_EL2, 0},
+ {"ich_lr2_el2", ICH_LR2_EL2, 0},
+ {"ich_lr3_el2", ICH_LR3_EL2, 0},
+ {"ich_lr4_el2", ICH_LR4_EL2, 0},
+ {"ich_lr5_el2", ICH_LR5_EL2, 0},
+ {"ich_lr6_el2", ICH_LR6_EL2, 0},
+ {"ich_lr7_el2", ICH_LR7_EL2, 0},
+ {"ich_lr8_el2", ICH_LR8_EL2, 0},
+ {"ich_lr9_el2", ICH_LR9_EL2, 0},
+ {"ich_lr10_el2", ICH_LR10_EL2, 0},
+ {"ich_lr11_el2", ICH_LR11_EL2, 0},
+ {"ich_lr12_el2", ICH_LR12_EL2, 0},
+ {"ich_lr13_el2", ICH_LR13_EL2, 0},
+ {"ich_lr14_el2", ICH_LR14_EL2, 0},
+ {"ich_lr15_el2", ICH_LR15_EL2, 0},
+
+ // Cyclone registers
+ {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3, AArch64::ProcCyclone},
+
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ {"pan", PAN, AArch64::HasV8_1aOps},
+
+ // v8.1a "Limited Ordering Regions" extension-specific system registers
+ {"lorsa_el1", LORSA_EL1, AArch64::HasV8_1aOps},
+ {"lorea_el1", LOREA_EL1, AArch64::HasV8_1aOps},
+ {"lorn_el1", LORN_EL1, AArch64::HasV8_1aOps},
+ {"lorc_el1", LORC_EL1, AArch64::HasV8_1aOps},
+ {"lorid_el1", LORID_EL1, AArch64::HasV8_1aOps},
+
+ // v8.1a "Virtualization host extensions" system registers
+ {"ttbr1_el2", TTBR1_EL2, AArch64::HasV8_1aOps},
+ {"contextidr_el2", CONTEXTIDR_EL2, AArch64::HasV8_1aOps},
+ {"cnthv_tval_el2", CNTHV_TVAL_EL2, AArch64::HasV8_1aOps},
+ {"cnthv_cval_el2", CNTHV_CVAL_EL2, AArch64::HasV8_1aOps},
+ {"cnthv_ctl_el2", CNTHV_CTL_EL2, AArch64::HasV8_1aOps},
+ {"sctlr_el12", SCTLR_EL12, AArch64::HasV8_1aOps},
+ {"cpacr_el12", CPACR_EL12, AArch64::HasV8_1aOps},
+ {"ttbr0_el12", TTBR0_EL12, AArch64::HasV8_1aOps},
+ {"ttbr1_el12", TTBR1_EL12, AArch64::HasV8_1aOps},
+ {"tcr_el12", TCR_EL12, AArch64::HasV8_1aOps},
+ {"afsr0_el12", AFSR0_EL12, AArch64::HasV8_1aOps},
+ {"afsr1_el12", AFSR1_EL12, AArch64::HasV8_1aOps},
+ {"esr_el12", ESR_EL12, AArch64::HasV8_1aOps},
+ {"far_el12", FAR_EL12, AArch64::HasV8_1aOps},
+ {"mair_el12", MAIR_EL12, AArch64::HasV8_1aOps},
+ {"amair_el12", AMAIR_EL12, AArch64::HasV8_1aOps},
+ {"vbar_el12", VBAR_EL12, AArch64::HasV8_1aOps},
+ {"contextidr_el12", CONTEXTIDR_EL12, AArch64::HasV8_1aOps},
+ {"cntkctl_el12", CNTKCTL_EL12, AArch64::HasV8_1aOps},
+ {"cntp_tval_el02", CNTP_TVAL_EL02, AArch64::HasV8_1aOps},
+ {"cntp_ctl_el02", CNTP_CTL_EL02, AArch64::HasV8_1aOps},
+ {"cntp_cval_el02", CNTP_CVAL_EL02, AArch64::HasV8_1aOps},
+ {"cntv_tval_el02", CNTV_TVAL_EL02, AArch64::HasV8_1aOps},
+ {"cntv_ctl_el02", CNTV_CTL_EL02, AArch64::HasV8_1aOps},
+ {"cntv_cval_el02", CNTV_CVAL_EL02, AArch64::HasV8_1aOps},
+ {"spsr_el12", SPSR_EL12, AArch64::HasV8_1aOps},
+ {"elr_el12", ELR_EL12, AArch64::HasV8_1aOps},
};
uint32_t
-AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, uint64_t FeatureBits,
+ bool &Valid) const {
std::string NameLower = Name.lower();
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
- if (SysRegMappings[i].Name == NameLower) {
+ if (SysRegMappings[i].isNameEqual(NameLower, FeatureBits)) {
Valid = true;
return SysRegMappings[i].Value;
}
}
- // Next search for target specific registers
- if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
- if (CycloneSysRegMappings[i].Name == NameLower) {
- Valid = true;
- return CycloneSysRegMappings[i].Value;
- }
- }
- }
-
// Now try the instruction-specific registers (either read-only or
// write-only).
for (unsigned i = 0; i < NumInstMappings; ++i) {
- if (InstMappings[i].Name == NameLower) {
+ if (InstMappings[i].isNameEqual(NameLower, FeatureBits)) {
Valid = true;
return InstMappings[i].Value;
}
@@ -814,27 +848,18 @@ AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
std::string
-AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, uint64_t FeatureBits) const {
// First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegMappings); ++i) {
- if (SysRegMappings[i].Value == Bits) {
+ if (SysRegMappings[i].isValueEqual(Bits, FeatureBits)) {
return SysRegMappings[i].Name;
}
}
- // Next search for target specific registers
- if (FeatureBits & AArch64::ProcCyclone) {
- for (unsigned i = 0; i < array_lengthof(CycloneSysRegMappings); ++i) {
- if (CycloneSysRegMappings[i].Value == Bits) {
- return CycloneSysRegMappings[i].Name;
- }
- }
- }
-
// Now try the instruction-specific registers (either read-only or
// write-only).
for (unsigned i = 0; i < NumInstMappings; ++i) {
- if (InstMappings[i].Value == Bits) {
+ if (InstMappings[i].isValueEqual(Bits, FeatureBits)) {
return InstMappings[i].Name;
}
}
@@ -851,38 +876,38 @@ AArch64SysReg::SysRegMapper::toString(uint32_t Bits) const {
}
const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIMappings[] = {
- {"ipas2e1is", IPAS2E1IS},
- {"ipas2le1is", IPAS2LE1IS},
- {"vmalle1is", VMALLE1IS},
- {"alle2is", ALLE2IS},
- {"alle3is", ALLE3IS},
- {"vae1is", VAE1IS},
- {"vae2is", VAE2IS},
- {"vae3is", VAE3IS},
- {"aside1is", ASIDE1IS},
- {"vaae1is", VAAE1IS},
- {"alle1is", ALLE1IS},
- {"vale1is", VALE1IS},
- {"vale2is", VALE2IS},
- {"vale3is", VALE3IS},
- {"vmalls12e1is", VMALLS12E1IS},
- {"vaale1is", VAALE1IS},
- {"ipas2e1", IPAS2E1},
- {"ipas2le1", IPAS2LE1},
- {"vmalle1", VMALLE1},
- {"alle2", ALLE2},
- {"alle3", ALLE3},
- {"vae1", VAE1},
- {"vae2", VAE2},
- {"vae3", VAE3},
- {"aside1", ASIDE1},
- {"vaae1", VAAE1},
- {"alle1", ALLE1},
- {"vale1", VALE1},
- {"vale2", VALE2},
- {"vale3", VALE3},
- {"vmalls12e1", VMALLS12E1},
- {"vaale1", VAALE1}
+ {"ipas2e1is", IPAS2E1IS, 0},
+ {"ipas2le1is", IPAS2LE1IS, 0},
+ {"vmalle1is", VMALLE1IS, 0},
+ {"alle2is", ALLE2IS, 0},
+ {"alle3is", ALLE3IS, 0},
+ {"vae1is", VAE1IS, 0},
+ {"vae2is", VAE2IS, 0},
+ {"vae3is", VAE3IS, 0},
+ {"aside1is", ASIDE1IS, 0},
+ {"vaae1is", VAAE1IS, 0},
+ {"alle1is", ALLE1IS, 0},
+ {"vale1is", VALE1IS, 0},
+ {"vale2is", VALE2IS, 0},
+ {"vale3is", VALE3IS, 0},
+ {"vmalls12e1is", VMALLS12E1IS, 0},
+ {"vaale1is", VAALE1IS, 0},
+ {"ipas2e1", IPAS2E1, 0},
+ {"ipas2le1", IPAS2LE1, 0},
+ {"vmalle1", VMALLE1, 0},
+ {"alle2", ALLE2, 0},
+ {"alle3", ALLE3, 0},
+ {"vae1", VAE1, 0},
+ {"vae2", VAE2, 0},
+ {"vae3", VAE3, 0},
+ {"aside1", ASIDE1, 0},
+ {"vaae1", VAAE1, 0},
+ {"alle1", ALLE1, 0},
+ {"vale1", VALE1, 0},
+ {"vale2", VALE2, 0},
+ {"vale3", VALE3, 0},
+ {"vmalls12e1", VMALLS12E1, 0},
+ {"vaale1", VAALE1, 0}
};
AArch64TLBI::TLBIMapper::TLBIMapper()
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 2ae6f52..659ea90 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -280,14 +280,26 @@ struct AArch64NamedImmMapper {
struct Mapping {
const char *Name;
uint32_t Value;
+ uint64_t AvailableForFeatures;
+ // empty AvailableForFeatures means "always-on"
+ bool isNameEqual(std::string Other, uint64_t FeatureBits=~0ULL) const {
+ if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+ return false;
+ return Name == Other;
+ }
+ bool isValueEqual(uint32_t Other, uint64_t FeatureBits=~0ULL) const {
+ if (AvailableForFeatures && !(AvailableForFeatures & FeatureBits))
+ return false;
+ return Value == Other;
+ }
};
template<int N>
AArch64NamedImmMapper(const Mapping (&Mappings)[N], uint32_t TooBigImm)
: Mappings(&Mappings[0]), NumMappings(N), TooBigImm(TooBigImm) {}
- StringRef toString(uint32_t Value, bool &Valid) const;
- uint32_t fromString(StringRef Name, bool &Valid) const;
+ StringRef toString(uint32_t Value, uint64_t FeatureBits, bool &Valid) const;
+ uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
/// Many of the instructions allow an alternative assembly form consisting of
/// a simple immediate. Currently the only valid forms are ranges [0, N) where
@@ -435,7 +447,10 @@ namespace AArch64PState {
Invalid = -1,
SPSel = 0x05,
DAIFSet = 0x1e,
- DAIFClr = 0x1f
+ DAIFClr = 0x1f,
+
+ // v8.1a "Privileged Access Never" extension-specific PStates
+ PAN = 0x04,
};
struct PStateMapper : AArch64NamedImmMapper {
@@ -1122,11 +1137,48 @@ namespace AArch64SysReg {
ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111
- };
- // Cyclone specific system registers
- enum CycloneSysRegValues {
- CPM_IOACC_CTL_EL3 = 0xff90
+ // v8.1a "Privileged Access Never" extension-specific system registers
+ PAN = 0xc213, // 11 000 0100 0010 011
+
+ // v8.1a "Limited Ordering Regions" extension-specific system registers
+ LORSA_EL1 = 0xc520, // 11 000 1010 0100 000
+ LOREA_EL1 = 0xc521, // 11 000 1010 0100 001
+ LORN_EL1 = 0xc522, // 11 000 1010 0100 010
+ LORC_EL1 = 0xc523, // 11 000 1010 0100 011
+ LORID_EL1 = 0xc527, // 11 000 1010 0100 111
+
+ // v8.1a "Virtualization host extensions" system registers
+ TTBR1_EL2 = 0xe101, // 11 100 0010 0000 001
+ CONTEXTIDR_EL2 = 0xe681, // 11 100 1101 0000 001
+ CNTHV_TVAL_EL2 = 0xe718, // 11 100 1110 0011 000
+ CNTHV_CVAL_EL2 = 0xe71a, // 11 100 1110 0011 010
+ CNTHV_CTL_EL2 = 0xe719, // 11 100 1110 0011 001
+ SCTLR_EL12 = 0xe880, // 11 101 0001 0000 000
+ CPACR_EL12 = 0xe882, // 11 101 0001 0000 010
+ TTBR0_EL12 = 0xe900, // 11 101 0010 0000 000
+ TTBR1_EL12 = 0xe901, // 11 101 0010 0000 001
+ TCR_EL12 = 0xe902, // 11 101 0010 0000 010
+ AFSR0_EL12 = 0xea88, // 11 101 0101 0001 000
+ AFSR1_EL12 = 0xea89, // 11 101 0101 0001 001
+ ESR_EL12 = 0xea90, // 11 101 0101 0010 000
+ FAR_EL12 = 0xeb00, // 11 101 0110 0000 000
+ MAIR_EL12 = 0xed10, // 11 101 1010 0010 000
+ AMAIR_EL12 = 0xed18, // 11 101 1010 0011 000
+ VBAR_EL12 = 0xee00, // 11 101 1100 0000 000
+ CONTEXTIDR_EL12 = 0xee81, // 11 101 1101 0000 001
+ CNTKCTL_EL12 = 0xef08, // 11 101 1110 0001 000
+ CNTP_TVAL_EL02 = 0xef10, // 11 101 1110 0010 000
+ CNTP_CTL_EL02 = 0xef11, // 11 101 1110 0010 001
+ CNTP_CVAL_EL02 = 0xef12, // 11 101 1110 0010 010
+ CNTV_TVAL_EL02 = 0xef18, // 11 101 1110 0011 000
+ CNTV_CTL_EL02 = 0xef19, // 11 101 1110 0011 001
+ CNTV_CVAL_EL02 = 0xef1a, // 11 101 1110 0011 010
+ SPSR_EL12 = 0xea00, // 11 101 0100 0000 000
+ ELR_EL12 = 0xea01, // 11 101 0100 0000 001
+
+ // Cyclone specific system registers
+ CPM_IOACC_CTL_EL3 = 0xff90,
};
// Note that these do not inherit from AArch64NamedImmMapper. This class is
@@ -1135,25 +1187,23 @@ namespace AArch64SysReg {
// this one case.
struct SysRegMapper {
static const AArch64NamedImmMapper::Mapping SysRegMappings[];
- static const AArch64NamedImmMapper::Mapping CycloneSysRegMappings[];
const AArch64NamedImmMapper::Mapping *InstMappings;
size_t NumInstMappings;
- uint64_t FeatureBits;
- SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
- uint32_t fromString(StringRef Name, bool &Valid) const;
- std::string toString(uint32_t Bits) const;
+ SysRegMapper() { }
+ uint32_t fromString(StringRef Name, uint64_t FeatureBits, bool &Valid) const;
+ std::string toString(uint32_t Bits, uint64_t FeatureBits) const;
};
struct MSRMapper : SysRegMapper {
static const AArch64NamedImmMapper::Mapping MSRMappings[];
- MSRMapper(uint64_t FeatureBits);
+ MSRMapper();
};
struct MRSMapper : SysRegMapper {
static const AArch64NamedImmMapper::Mapping MRSMappings[];
- MRSMapper(uint64_t FeatureBits);
+ MRSMapper();
};
uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ce0aed9..bd1c7af 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -180,7 +180,7 @@ def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true",
"Support ARM v8 instructions",
[HasV7Ops, FeatureVirtualization,
FeatureMP]>;
-def FeatureV8_1a : SubtargetFeature<"v8.1a", "HasV8_1a", "true",
+def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
"Support ARM v8.1a instructions",
[HasV8Ops, FeatureAClass, FeatureCRC]>;
@@ -260,6 +260,14 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureTrustZone, FeatureT2XtPk,
FeatureCrypto, FeatureCRC]>;
+def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+ "Cortex-R4 ARM processors",
+ [FeatureHWDiv,
+ FeatureAvoidPartialCPSR,
+ FeatureDSPThumb2, FeatureT2XtPk,
+ HasV7Ops, FeatureDB, FeatureHasRAS,
+ FeatureRClass]>;
+
def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
"Cortex-R5 ARM processors",
[FeatureSlowFPBrcc,
@@ -396,6 +404,16 @@ def : ProcessorModel<"krait", CortexA9Model,
FeatureDSPThumb2, FeatureHasRAS,
FeatureAClass]>;
+// FIXME: R4 has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4", CortexA8Model,
+ [ProcR4]>;
+
+// FIXME: R4F has currently the same ProcessorModel as A8.
+def : ProcessorModel<"cortex-r4f", CortexA8Model,
+ [ProcR4,
+ FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
+ FeatureVFP3, FeatureVFPOnlySP, FeatureD16]>;
+
// FIXME: R5 has currently the same ProcessorModel as A8.
def : ProcessorModel<"cortex-r5", CortexA8Model,
[ProcR5, HasV7Ops, FeatureDB,
@@ -457,14 +475,6 @@ def : ProcessorModel<"cyclone", SwiftModel,
FeatureDB,FeatureDSPThumb2,
FeatureHasRAS, FeatureZCZeroing]>;
-// V8.1 Processors
-def : ProcNoItin<"generic-armv8.1-a", [HasV8Ops, FeatureV8_1a,
- FeatureDB, FeatureFPARMv8,
- FeatureNEON, FeatureDSPThumb2,
- FeatureHWDiv, FeatureHWDivARM,
- FeatureTrustZone, FeatureT2XtPk,
- FeatureCrypto]>;
-
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
@@ -485,7 +495,15 @@ def ARMInstrInfo : InstrInfo;
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def ARMAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
+
def ARM : Target {
// Pull in Instruction Info:
let InstructionSet = ARMInstrInfo;
+ let AssemblyWriters = [ARMAsmWriter];
}
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 102def1..1a2acf5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -207,7 +207,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
- return OutContext.GetOrCreateSymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name);
}
@@ -216,7 +216,7 @@ MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
SmallString<60> Name;
raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
<< getFunctionNumber();
- return OutContext.GetOrCreateSymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name);
}
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
@@ -520,28 +520,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
-
- // Emit a .data.rel section containing any stubs that were created.
- if (TT.isOSBinFormatELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
-
- for (auto &stub: Stubs) {
- OutStreamer.EmitLabel(stub.first);
- OutStreamer.EmitSymbolValue(stub.second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
- }
}
//===----------------------------------------------------------------------===//
@@ -597,7 +575,7 @@ void ARMAsmPrinter::emitAttributes() {
std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS.str();
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
else
ArchFS = FS;
}
@@ -661,8 +639,8 @@ void ARMAsmPrinter::emitAttributes() {
// Emit Tag_Advanced_SIMD_arch for ARMv8 architecture
if (STI.hasV8Ops())
ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
- STI.hasV8_1a() ? ARMBuildAttrs::AllowNeonARMv8_1a:
- ARMBuildAttrs::AllowNeonARMv8);
+ STI.hasV8_1aOps() ? ARMBuildAttrs::AllowNeonARMv8_1a:
+ ARMBuildAttrs::AllowNeonARMv8);
} else {
if (STI.hasFPARMv8())
// FPv5 and FP-ARMv8 have the same instructions, so are modeled as one
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index a8c7657..3f79a9b 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -245,11 +245,15 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// This register should preferably be even (Odd == 0) or odd (Odd == 1).
// Check if the other part of the pair has already been assigned, and provide
// the paired register as the first hint.
+ unsigned Paired = Hint.second;
+ if (Paired == 0)
+ return;
+
unsigned PairedPhys = 0;
- if (VRM && VRM->hasPhys(Hint.second)) {
- PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
- if (PairedPhys && MRI.isReserved(PairedPhys))
- PairedPhys = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(Paired)) {
+ PairedPhys = Paired;
+ } else if (VRM && VRM->hasPhys(Paired)) {
+ PairedPhys = getPairedGPR(VRM->getPhys(Paired), Odd, this);
}
// First prefer the paired physreg.
@@ -284,9 +288,14 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
// change.
unsigned OtherReg = Hint.second;
Hint = MRI->getRegAllocationHint(OtherReg);
- if (Hint.second == Reg)
- // Make sure the pair has not already divorced.
+ // Make sure the pair has not already divorced.
+ if (Hint.second == Reg) {
MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ MRI->setRegAllocationHint(NewReg,
+ Hint.first == (unsigned)ARMRI::RegPairOdd ? ARMRI::RegPairEven
+ : ARMRI::RegPairOdd, OtherReg);
+ }
}
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 13bef54..36f63e2 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -86,7 +86,7 @@ protected:
}
public:
- virtual ~ARMConstantPoolValue();
+ ~ARMConstantPoolValue() override;
ARMCP::ARMCPModifier getModifier() const { return Modifier; }
const char *getModifierText() const;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 830953b..9d2b09b 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -311,6 +311,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
return;
StackAdjustingInsts DefCFAOffsetCandidates;
+ bool HasFP = hasFP(MF);
// Allocate the vararg register save area.
if (ArgRegsSaveSize) {
@@ -327,6 +328,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
DefCFAOffsetCandidates.addInst(std::prev(MBBI),
NumBytes - ArgRegsSaveSize, true);
}
+ DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
return;
}
@@ -375,7 +377,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
// Determine starting offsets of spill areas.
- bool HasFP = hasFP(MF);
unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 3b1b8dd..72afd2c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -3504,25 +3504,34 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- // Try to generate VSEL on ARMv8.
+ // Try to generate VMAXNM/VMINNM on ARMv8.
if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
TrueVal.getValueType() == MVT::f64)) {
- // We can select VMAXNM/VMINNM from a compare followed by a select with the
+ // We can use VMAXNM/VMINNM for a compare followed by a select with the
// same operands, as follows:
- // c = fcmp [ogt, olt, ugt, ult] a, b
+ // c = fcmp [?gt, ?ge, ?lt, ?le] a, b
// select c, a, b
- // We only do this in unsafe-fp-math, because signed zeros and NaNs are
- // handled differently than the original code sequence.
+ // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
+ // We only do this transformation in UnsafeFPMath and for no-NaNs
+ // comparisons, because signed zeros and NaNs are handled differently than
+ // the original code sequence.
+ // FIXME: There are more cases that can be transformed even with NaNs,
+ // signed zeroes and safe math. E.g. in the following, the result will be
+ // FalseVal if a is a NaN or -0./0. and that's what vmaxnm will give, too.
+ // c = fcmp ogt, a, 0. ; select c, a, 0. => vmaxnm a, 0.
+ // FIXME: There is similar code that allows some extensions in
+ // AArch64TargetLowering::LowerSELECT_CC that should be shared with this
+ // code.
if (getTargetMachine().Options.UnsafeFPMath) {
if (LHS == TrueVal && RHS == FalseVal) {
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
} else if (LHS == FalseVal && RHS == TrueVal) {
- if (CC == ISD::SETOLT || CC == ISD::SETULT)
+ if (CC == ISD::SETLT || CC == ISD::SETLE)
return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal);
- if (CC == ISD::SETOGT || CC == ISD::SETUGT)
+ if (CC == ISD::SETGT || CC == ISD::SETGE)
return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal);
}
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c3984ca..52f3555 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -208,6 +208,8 @@ def HasV8 : Predicate<"Subtarget->hasV8Ops()">,
AssemblerPredicate<"HasV8Ops", "armv8">;
def PreV8 : Predicate<"!Subtarget->hasV8Ops()">,
AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
+def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
+ AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">,
AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -226,8 +228,6 @@ def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<"FeatureCrypto", "crypto">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<"FeatureCRC", "crc">;
-def HasV8_1a : Predicate<"Subtarget->hasV8_1a()">,
- AssemblerPredicate<"FeatureV8_1a", "v8.1a">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<"FeatureFP16","half-float">;
def HasDivide : Predicate<"Subtarget->hasDivide()">,
@@ -388,6 +388,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
// Immediate operands with a shared generic asm render method.
class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+// Operands that are part of a memory addressing mode.
+class MemOperand : Operand<i32> { let OperandType = "OPERAND_MEMORY"; }
+
// Branch target.
// FIXME: rename brtarget to t2_brtarget
def brtarget : Operand<OtherVT> {
@@ -790,7 +793,7 @@ def imm1_16 : Operand<i32>, PatLeaf<(imm), [{ return Imm > 0 && Imm <= 16; }],
// addrmode_imm12 := reg +/- imm12
//
def MemImm12OffsetAsmOperand : AsmOperandClass { let Name = "MemImm12Offset"; }
-class AddrMode_Imm12 : Operand<i32>,
+class AddrMode_Imm12 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
// 12-bit immediate operand. Note that instructions using this encode
// #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
@@ -813,7 +816,7 @@ def addrmode_imm12_pre : AddrMode_Imm12 {
// ldst_so_reg := reg +/- reg shop imm
//
def MemRegOffsetAsmOperand : AsmOperandClass { let Name = "MemRegOffset"; }
-def ldst_so_reg : Operand<i32>,
+def ldst_so_reg : MemOperand,
ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
let EncoderMethod = "getLdStSORegOpValue";
// FIXME: Simplify the printer
@@ -829,7 +832,7 @@ def ldst_so_reg : Operand<i32>,
// {8} 1 is imm8 is non-negative. 0 otherwise.
// {7-0} [0,255] imm8 value.
def PostIdxImm8AsmOperand : AsmOperandClass { let Name = "PostIdxImm8"; }
-def postidx_imm8 : Operand<i32> {
+def postidx_imm8 : MemOperand {
let PrintMethod = "printPostIdxImm8Operand";
let ParserMatchClass = PostIdxImm8AsmOperand;
let MIOperandInfo = (ops i32imm);
@@ -841,7 +844,7 @@ def postidx_imm8 : Operand<i32> {
// {8} 1 is imm8 is non-negative. 0 otherwise.
// {7-0} [0,255] imm8 value, scaled by 4.
def PostIdxImm8s4AsmOperand : AsmOperandClass { let Name = "PostIdxImm8s4"; }
-def postidx_imm8s4 : Operand<i32> {
+def postidx_imm8s4 : MemOperand {
let PrintMethod = "printPostIdxImm8s4Operand";
let ParserMatchClass = PostIdxImm8s4AsmOperand;
let MIOperandInfo = (ops i32imm);
@@ -854,7 +857,7 @@ def PostIdxRegAsmOperand : AsmOperandClass {
let Name = "PostIdxReg";
let ParserMethod = "parsePostIdxReg";
}
-def postidx_reg : Operand<i32> {
+def postidx_reg : MemOperand {
let EncoderMethod = "getPostIdxRegOpValue";
let DecoderMethod = "DecodePostIdxReg";
let PrintMethod = "printPostIdxRegOperand";
@@ -869,7 +872,7 @@ def postidx_reg : Operand<i32> {
// FIXME: addrmode2 should be refactored the rest of the way to always
// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
-def addrmode2 : Operand<i32>,
+def addrmode2 : MemOperand,
ComplexPattern<i32, 3, "SelectAddrMode2", []> {
let EncoderMethod = "getAddrMode2OpValue";
let PrintMethod = "printAddrMode2Operand";
@@ -881,7 +884,7 @@ def PostIdxRegShiftedAsmOperand : AsmOperandClass {
let Name = "PostIdxRegShifted";
let ParserMethod = "parsePostIdxReg";
}
-def am2offset_reg : Operand<i32>,
+def am2offset_reg : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode2OffsetReg",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -894,7 +897,7 @@ def am2offset_reg : Operand<i32>,
// FIXME: am2offset_imm should only need the immediate, not the GPR. Having
// the GPR is purely vestigal at this point.
def AM2OffsetImmAsmOperand : AsmOperandClass { let Name = "AM2OffsetImm"; }
-def am2offset_imm : Operand<i32>,
+def am2offset_imm : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode2OffsetImm",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode2OffsetOpValue";
@@ -909,7 +912,7 @@ def am2offset_imm : Operand<i32>,
//
// FIXME: split into imm vs. reg versions.
def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-class AddrMode3 : Operand<i32>,
+class AddrMode3 : MemOperand,
ComplexPattern<i32, 3, "SelectAddrMode3", []> {
let EncoderMethod = "getAddrMode3OpValue";
let ParserMatchClass = AddrMode3AsmOperand;
@@ -932,7 +935,7 @@ def AM3OffsetAsmOperand : AsmOperandClass {
let Name = "AM3Offset";
let ParserMethod = "parseAM3Offset";
}
-def am3offset : Operand<i32>,
+def am3offset : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode3Offset",
[], [SDNPWantRoot]> {
let EncoderMethod = "getAddrMode3OffsetOpValue";
@@ -951,7 +954,7 @@ def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
// addrmode5 := reg +/- imm8*4
//
def AddrMode5AsmOperand : AsmOperandClass { let Name = "AddrMode5"; }
-class AddrMode5 : Operand<i32>,
+class AddrMode5 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode5", []> {
let EncoderMethod = "getAddrMode5OpValue";
let DecoderMethod = "DecodeAddrMode5Operand";
@@ -970,7 +973,7 @@ def addrmode5_pre : AddrMode5 {
// addrmode6 := reg with optional alignment
//
def AddrMode6AsmOperand : AsmOperandClass { let Name = "AlignedMemory"; }
-def addrmode6 : Operand<i32>,
+def addrmode6 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -979,7 +982,7 @@ def addrmode6 : Operand<i32>,
let ParserMatchClass = AddrMode6AsmOperand;
}
-def am6offset : Operand<i32>,
+def am6offset : MemOperand,
ComplexPattern<i32, 1, "SelectAddrMode6Offset",
[], [SDNPWantRoot]> {
let PrintMethod = "printAddrMode6OffsetOperand";
@@ -990,7 +993,7 @@ def am6offset : Operand<i32>,
// Special version of addrmode6 to handle alignment encoding for VST1/VLD1
// (single element from one lane) for size 32.
-def addrmode6oneL32 : Operand<i32>,
+def addrmode6oneL32 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -998,7 +1001,7 @@ def addrmode6oneL32 : Operand<i32>,
}
// Base class for addrmode6 with specific alignment restrictions.
-class AddrMode6Align : Operand<i32>,
+class AddrMode6Align : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
@@ -1074,7 +1077,7 @@ def addrmode6align64or128or256 : AddrMode6Align {
// Special version of addrmode6 to handle alignment encoding for VLD-dup
// instructions, specifically VLD4-dup.
-def addrmode6dup : Operand<i32>,
+def addrmode6dup : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1085,7 +1088,7 @@ def addrmode6dup : Operand<i32>,
}
// Base class for addrmode6dup with specific alignment restrictions.
-class AddrMode6DupAlign : Operand<i32>,
+class AddrMode6DupAlign : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, i32imm);
@@ -1149,7 +1152,7 @@ def addrmode6dupalign64or128 : AddrMode6DupAlign {
// addrmodepc := pc + reg
//
-def addrmodepc : Operand<i32>,
+def addrmodepc : MemOperand,
ComplexPattern<i32, 2, "SelectAddrModePC", []> {
let PrintMethod = "printAddrModePCOperand";
let MIOperandInfo = (ops GPR, i32imm);
@@ -1158,7 +1161,7 @@ def addrmodepc : Operand<i32>,
// addr_offset_none := reg
//
def MemNoOffsetAsmOperand : AsmOperandClass { let Name = "MemNoOffset"; }
-def addr_offset_none : Operand<i32>,
+def addr_offset_none : MemOperand,
ComplexPattern<i32, 1, "SelectAddrOffsetNone", []> {
let PrintMethod = "printAddrMode7Operand";
let DecoderMethod = "DecodeAddrMode7Operand";
@@ -1417,7 +1420,8 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- PatFrag opnode, bit Commutable = 0> {
+ PatFrag opnode, bit Commutable = 0,
+ string rrDecoderMethod = ""> {
def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii,
opc, "\t$Rn, $imm",
[(opnode GPR:$Rn, mod_imm:$imm)]>,
@@ -1445,6 +1449,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc,
let Inst{15-12} = 0b0000;
let Inst{11-4} = 0b00000000;
let Inst{3-0} = Rm;
+ let DecoderMethod = rrDecoderMethod;
let Unpredictable{15-12} = 0b1111;
}
@@ -4263,6 +4268,30 @@ def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>;
def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>;
//===----------------------------------------------------------------------===//
+// ARMv8.1a Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def SETPAN : AInoP<(outs), (ins imm0_1:$imm), MiscFrm, NoItinerary, "setpan",
+ "\t$imm", []>, Requires<[IsARM, HasV8, HasV8_1a]> {
+ bits<1> imm;
+
+ let Inst{31-28} = 0b1111;
+ let Inst{27-20} = 0b00010001;
+ let Inst{19-16} = 0b0000;
+ let Inst{15-10} = 0b000000;
+ let Inst{9} = imm;
+ let Inst{8} = 0b0;
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = 0b0000;
+
+ let Unpredictable{19-16} = 0b1111;
+ let Unpredictable{15-10} = 0b111111;
+ let Unpredictable{8} = 0b1;
+ let Unpredictable{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
@@ -4366,7 +4395,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm),
// Note that TST/TEQ don't set all the same flags that CMP does!
defm TST : AI1_cmp_irs<0b1000, "tst",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
- BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+ BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1,
+ "DecodeTSTInstruction">;
defm TEQ : AI1_cmp_irs<0b1001, "teq",
IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 3c62e0e..d0ade77 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -142,7 +142,7 @@ def t_blxtarget : Operand<i32> {
// t_addrmode_pc := <label> => pc + imm8 * 4
//
-def t_addrmode_pc : Operand<i32> {
+def t_addrmode_pc : MemOperand {
let EncoderMethod = "getAddrModePCOpValue";
let DecoderMethod = "DecodeThumbAddrModePC";
let PrintMethod = "printThumbLdrLabelOperand";
@@ -153,7 +153,7 @@ def t_addrmode_pc : Operand<i32> {
// t_addrmode_rr := reg + reg
//
def t_addrmode_rr_asm_operand : AsmOperandClass { let Name = "MemThumbRR"; }
-def t_addrmode_rr : Operand<i32>,
+def t_addrmode_rr : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
@@ -169,7 +169,7 @@ def t_addrmode_rr : Operand<i32>,
// the reg+imm forms will match instead. This is a horrible way to do that,
// as it forces tight coupling between the methods, but it's how selectiondag
// currently works.
-def t_addrmode_rrs1 : Operand<i32>,
+def t_addrmode_rrs1 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let PrintMethod = "printThumbAddrModeRROperand";
@@ -177,7 +177,7 @@ def t_addrmode_rrs1 : Operand<i32>,
let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
-def t_addrmode_rrs2 : Operand<i32>,
+def t_addrmode_rrs2 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -185,7 +185,7 @@ def t_addrmode_rrs2 : Operand<i32>,
let ParserMatchClass = t_addrmode_rr_asm_operand;
let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
}
-def t_addrmode_rrs4 : Operand<i32>,
+def t_addrmode_rrs4 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
let EncoderMethod = "getThumbAddrModeRegRegOpValue";
let DecoderMethod = "DecodeThumbAddrModeRR";
@@ -197,7 +197,7 @@ def t_addrmode_rrs4 : Operand<i32>,
// t_addrmode_is4 := reg + imm5 * 4
//
def t_addrmode_is4_asm_operand : AsmOperandClass { let Name = "MemThumbRIs4"; }
-def t_addrmode_is4 : Operand<i32>,
+def t_addrmode_is4 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
let EncoderMethod = "getAddrModeISOpValue";
let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -209,7 +209,7 @@ def t_addrmode_is4 : Operand<i32>,
// t_addrmode_is2 := reg + imm5 * 2
//
def t_addrmode_is2_asm_operand : AsmOperandClass { let Name = "MemThumbRIs2"; }
-def t_addrmode_is2 : Operand<i32>,
+def t_addrmode_is2 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
let EncoderMethod = "getAddrModeISOpValue";
let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -221,7 +221,7 @@ def t_addrmode_is2 : Operand<i32>,
// t_addrmode_is1 := reg + imm5
//
def t_addrmode_is1_asm_operand : AsmOperandClass { let Name = "MemThumbRIs1"; }
-def t_addrmode_is1 : Operand<i32>,
+def t_addrmode_is1 : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
let EncoderMethod = "getAddrModeISOpValue";
let DecoderMethod = "DecodeThumbAddrModeIS";
@@ -235,7 +235,7 @@ def t_addrmode_is1 : Operand<i32>,
// FIXME: This really shouldn't have an explicit SP operand at all. It should
// be implicit, just like in the instruction encoding itself.
def t_addrmode_sp_asm_operand : AsmOperandClass { let Name = "MemThumbSPI"; }
-def t_addrmode_sp : Operand<i32>,
+def t_addrmode_sp : MemOperand,
ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
let EncoderMethod = "getAddrModeThumbSPOpValue";
let DecoderMethod = "DecodeThumbAddrModeSP";
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 10b0a0e..103ee00 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -148,7 +148,7 @@ def lo5AllOne : PatLeaf<(i32 imm), [{
// t2addrmode_imm12 := reg + imm12
def t2addrmode_imm12_asmoperand : AsmOperandClass {let Name="MemUImm12Offset";}
-def t2addrmode_imm12 : Operand<i32>,
+def t2addrmode_imm12 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
let PrintMethod = "printAddrModeImm12Operand<false>";
let EncoderMethod = "getAddrModeImm12OpValue";
@@ -178,7 +178,7 @@ def t2adrlabel : Operand<i32> {
// t2addrmode_posimm8 := reg + imm8
def MemPosImm8OffsetAsmOperand : AsmOperandClass {let Name="MemPosImm8Offset";}
-def t2addrmode_posimm8 : Operand<i32> {
+def t2addrmode_posimm8 : MemOperand {
let PrintMethod = "printT2AddrModeImm8Operand<false>";
let EncoderMethod = "getT2AddrModeImm8OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -188,7 +188,7 @@ def t2addrmode_posimm8 : Operand<i32> {
// t2addrmode_negimm8 := reg - imm8
def MemNegImm8OffsetAsmOperand : AsmOperandClass {let Name="MemNegImm8Offset";}
-def t2addrmode_negimm8 : Operand<i32>,
+def t2addrmode_negimm8 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let PrintMethod = "printT2AddrModeImm8Operand<false>";
let EncoderMethod = "getT2AddrModeImm8OpValue";
@@ -199,7 +199,7 @@ def t2addrmode_negimm8 : Operand<i32>,
// t2addrmode_imm8 := reg +/- imm8
def MemImm8OffsetAsmOperand : AsmOperandClass { let Name = "MemImm8Offset"; }
-class T2AddrMode_Imm8 : Operand<i32>,
+class T2AddrMode_Imm8 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
let EncoderMethod = "getT2AddrModeImm8OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8";
@@ -215,7 +215,7 @@ def t2addrmode_imm8_pre : T2AddrMode_Imm8 {
let PrintMethod = "printT2AddrModeImm8Operand<true>";
}
-def t2am_imm8_offset : Operand<i32>,
+def t2am_imm8_offset : MemOperand,
ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
[], [SDNPWantRoot]> {
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
@@ -225,7 +225,7 @@ def t2am_imm8_offset : Operand<i32>,
// t2addrmode_imm8s4 := reg +/- (imm8 << 2)
def MemImm8s4OffsetAsmOperand : AsmOperandClass {let Name = "MemImm8s4Offset";}
-class T2AddrMode_Imm8s4 : Operand<i32> {
+class T2AddrMode_Imm8s4 : MemOperand {
let EncoderMethod = "getT2AddrModeImm8s4OpValue";
let DecoderMethod = "DecodeT2AddrModeImm8s4";
let ParserMatchClass = MemImm8s4OffsetAsmOperand;
@@ -241,7 +241,7 @@ def t2addrmode_imm8s4_pre : T2AddrMode_Imm8s4 {
}
def t2am_imm8s4_offset_asmoperand : AsmOperandClass { let Name = "Imm8s4"; }
-def t2am_imm8s4_offset : Operand<i32> {
+def t2am_imm8s4_offset : MemOperand {
let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
let EncoderMethod = "getT2Imm8s4OpValue";
let DecoderMethod = "DecodeT2Imm8S4";
@@ -251,7 +251,7 @@ def t2am_imm8s4_offset : Operand<i32> {
def MemImm0_1020s4OffsetAsmOperand : AsmOperandClass {
let Name = "MemImm0_1020s4Offset";
}
-def t2addrmode_imm0_1020s4 : Operand<i32>,
+def t2addrmode_imm0_1020s4 : MemOperand,
ComplexPattern<i32, 2, "SelectT2AddrModeExclusive"> {
let PrintMethod = "printT2AddrModeImm0_1020s4Operand";
let EncoderMethod = "getT2AddrModeImm0_1020s4OpValue";
@@ -262,7 +262,7 @@ def t2addrmode_imm0_1020s4 : Operand<i32>,
// t2addrmode_so_reg := reg + (reg << imm2)
def t2addrmode_so_reg_asmoperand : AsmOperandClass {let Name="T2MemRegOffset";}
-def t2addrmode_so_reg : Operand<i32>,
+def t2addrmode_so_reg : MemOperand,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
let EncoderMethod = "getT2AddrModeSORegOpValue";
@@ -273,13 +273,13 @@ def t2addrmode_so_reg : Operand<i32>,
// Addresses for the TBB/TBH instructions.
def addrmode_tbb_asmoperand : AsmOperandClass { let Name = "MemTBB"; }
-def addrmode_tbb : Operand<i32> {
+def addrmode_tbb : MemOperand {
let PrintMethod = "printAddrModeTBB";
let ParserMatchClass = addrmode_tbb_asmoperand;
let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
}
def addrmode_tbh_asmoperand : AsmOperandClass { let Name = "MemTBH"; }
-def addrmode_tbh : Operand<i32> {
+def addrmode_tbh : MemOperand {
let PrintMethod = "printAddrModeTBH";
let ParserMatchClass = addrmode_tbh_asmoperand;
let MIOperandInfo = (ops GPR:$Rn, rGPR:$Rm);
@@ -3630,8 +3630,8 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>,
- Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> {
+def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
+ Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
bits<4> func;
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -4281,6 +4281,23 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
//===----------------------------------------------------------------------===//
+// ARMv8.1 Privilege Access Never extension
+//
+// SETPAN #imm1
+
+def t2SETPAN : T1I<(outs), (ins imm0_1:$imm), NoItinerary, "setpan\t$imm", []>,
+ T1Misc<0b0110000>, Requires<[IsThumb2, HasV8, HasV8_1a]> {
+ bits<1> imm;
+
+ let Inst{4} = 0b1;
+ let Inst{3} = imm;
+ let Inst{2-0} = 0b000;
+
+ let Unpredictable{4} = 0b1;
+ let Unpredictable{2-0} = 0b111;
+}
+
+//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fbec9e6..2a3e1da 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -133,6 +133,7 @@ void ARMSubtarget::initializeEnvironment() {
HasV6T2Ops = false;
HasV7Ops = false;
HasV8Ops = false;
+ HasV8_1aOps = false;
HasVFPv2 = false;
HasVFPv3 = false;
HasVFPv4 = false;
@@ -166,7 +167,6 @@ void ARMSubtarget::initializeEnvironment() {
HasTrustZone = false;
HasCrypto = false;
HasCRC = false;
- HasV8_1a = false;
HasZeroCycleZeroing = false;
AllowsUnalignedMem = false;
Thumb2DSP = false;
@@ -191,7 +191,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS.str();
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
else
ArchFS = FS;
}
@@ -254,7 +254,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
switch (IT) {
case DefaultIT:
- RestrictIT = hasV8Ops() ? true : false;
+ RestrictIT = hasV8Ops();
break;
case RestrictedIT:
RestrictIT = true;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f36cd5c..d82314d 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -43,7 +43,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
- CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait,
+ CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
};
enum ARMProcClassEnum {
None, AClass, RClass, MClass
@@ -67,6 +67,7 @@ protected:
bool HasV6T2Ops;
bool HasV7Ops;
bool HasV8Ops;
+ bool HasV8_1aOps;
/// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
/// floating point ISAs are supported.
@@ -182,9 +183,6 @@ protected:
/// HasCRC - if true, processor supports CRC instructions
bool HasCRC;
- /// HasV8_1a - if true, the processor has V8.1a: PAN and RDMA extensions
- bool HasV8_1a;
-
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing;
@@ -295,6 +293,7 @@ public:
bool hasV6T2Ops() const { return HasV6T2Ops; }
bool hasV7Ops() const { return HasV7Ops; }
bool hasV8Ops() const { return HasV8Ops; }
+ bool hasV8_1aOps() const { return HasV8_1aOps; }
bool isCortexA5() const { return ARMProcFamily == CortexA5; }
bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -316,7 +315,6 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
- bool hasV8_1a() const { return HasV8_1a; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1bee1b0..ae33340 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -42,6 +42,11 @@ EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden,
cl::desc("Enable ARM load/store optimization pass"),
cl::init(true));
+// FIXME: Unify control over GlobalMerge.
+static cl::opt<cl::boolOrDefault>
+EnableGlobalMerge("arm-global-merge", cl::Hidden,
+ cl::desc("Enable the global merge pass"));
+
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
@@ -332,7 +337,9 @@ void ARMPassConfig::addIRPasses() {
}
bool ARMPassConfig::addPreISel() {
- if (TM->getOptLevel() == CodeGenOpt::Aggressive)
+ if ((TM->getOptLevel() == CodeGenOpt::Aggressive &&
+ EnableGlobalMerge == cl::BOU_UNSET) ||
+ EnableGlobalMerge == cl::BOU_TRUE)
// FIXME: This is using the thumb1 only constant value for
// maximal global offset for merging globals. We may want
// to look into using the old value for non-thumb1 code of
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 2215efb..b9ad2c8 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -276,8 +276,8 @@ class ARMAsmParser : public MCTargetAsmParser {
bool hasD16() const {
return STI.getFeatureBits() & ARM::FeatureD16;
}
- bool hasV8_1a() const {
- return STI.getFeatureBits() & ARM::FeatureV8_1a;
+ bool hasV8_1aOps() const {
+ return STI.getFeatureBits() & ARM::HasV8_1aOps;
}
void SwitchMode() {
@@ -5418,47 +5418,44 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
/// inclusion of carry set or predication code operands.
//
// FIXME: It would be nice to autogen this.
-void ARMAsmParser::
-getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
- bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) {
- if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
+ bool &CanAcceptCarrySet,
+ bool &CanAcceptPredicationCode) {
+ CanAcceptCarrySet =
+ Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
- Mnemonic == "add" || Mnemonic == "adc" ||
- Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
- Mnemonic == "orr" || Mnemonic == "mvn" ||
- Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
- Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
- Mnemonic == "vfm" || Mnemonic == "vfnm" ||
- (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
- Mnemonic == "mla" || Mnemonic == "smlal" ||
- Mnemonic == "umlal" || Mnemonic == "umull"))) {
- CanAcceptCarrySet = true;
- } else
- CanAcceptCarrySet = false;
+ Mnemonic == "add" || Mnemonic == "adc" || Mnemonic == "mul" ||
+ Mnemonic == "bic" || Mnemonic == "asr" || Mnemonic == "orr" ||
+ Mnemonic == "mvn" || Mnemonic == "rsb" || Mnemonic == "rsc" ||
+ Mnemonic == "orn" || Mnemonic == "sbc" || Mnemonic == "eor" ||
+ Mnemonic == "neg" || Mnemonic == "vfm" || Mnemonic == "vfnm" ||
+ (!isThumb() &&
+ (Mnemonic == "smull" || Mnemonic == "mov" || Mnemonic == "mla" ||
+ Mnemonic == "smlal" || Mnemonic == "umlal" || Mnemonic == "umull"));
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
- Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
+ Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
- Mnemonic.startswith("vsel") ||
- Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
- Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
- Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
- Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" ||
+ Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" ||
+ Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" ||
+ Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" ||
+ Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" ||
+ Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" ||
Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") ||
(FullInst.startswith("vmull") && FullInst.endswith(".p64"))) {
// These mnemonics are never predicable
CanAcceptPredicationCode = false;
} else if (!isThumb()) {
// Some instructions are only predicable in Thumb mode
- CanAcceptPredicationCode
- = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
+ CanAcceptPredicationCode =
+ Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" &&
Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" &&
Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" &&
Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" &&
- Mnemonic != "ldc2" && Mnemonic != "ldc2l" &&
- Mnemonic != "stc2" && Mnemonic != "stc2l" &&
- !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs");
+ Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" &&
+ Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") &&
+ !Mnemonic.startswith("srs");
} else if (isThumbOne()) {
if (hasV6MOps())
CanAcceptPredicationCode = Mnemonic != "movs";
@@ -6153,6 +6150,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"destination operands can't be identical");
return false;
}
+ case ARM::t2BXJ: {
+ const unsigned RmReg = Inst.getOperand(0).getReg();
+ // Rm = SP is no longer unpredictable in v8-A
+ if (RmReg == ARM::SP && !hasV8Ops())
+ return Error(Operands[2]->getStartLoc(),
+ "r13 (SP) is an unpredictable operand to BXJ");
+ return false;
+ }
case ARM::STRD: {
// Rt2 must be Rt + 1.
unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg());
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4d5122a..4c169a8 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -91,7 +91,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ARMDisassembler() {}
+ ~ARMDisassembler() override {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -106,7 +106,7 @@ public:
MCDisassembler(STI, Ctx) {
}
- ~ThumbDisassembler() {}
+ ~ThumbDisassembler() override {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -212,6 +212,10 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
@@ -2119,6 +2123,54 @@ static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeTSTInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Pred = fieldFromInstruction(Insn, 28, 4);
+ unsigned Rn = fieldFromInstruction(Insn, 16, 4);
+ unsigned Rm = fieldFromInstruction(Insn, 0, 4);
+
+ if (Pred == 0xF)
+ return DecodeSETPANInstruction(Inst, Insn, Address, Decoder);
+
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+ return MCDisassembler::Fail;
+ if (!Check(S, DecodePredicateOperand(Inst, Pred, Address, Decoder)))
+ return MCDisassembler::Fail;
+
+ return S;
+}
+
+static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ unsigned Imm = fieldFromInstruction(Insn, 9, 1);
+
+ const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+ uint64_t FeatureBits = Dis->getSubtargetInfo().getFeatureBits();
+ if ((FeatureBits & ARM::HasV8_1aOps) == 0 ||
+ (FeatureBits & ARM::HasV8Ops) == 0 )
+ return MCDisassembler::Fail;
+
+ // Decoder can be called from DecodeTST, which does not check the full
+ // encoding is valid.
+ if (fieldFromInstruction(Insn, 20,12) != 0xf11 ||
+ fieldFromInstruction(Insn, 4,4) != 0)
+ return MCDisassembler::Fail;
+ if (fieldFromInstruction(Insn, 10,10) != 0 ||
+ fieldFromInstruction(Insn, 0,4) != 0)
+ S = MCDisassembler::SoftFail;
+
+ Inst.setOpcode(ARM::SETPAN);
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+
+ return S;
+}
+
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e15323d..c2e1b2a 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -40,12 +40,12 @@ static unsigned translateShiftImm(unsigned imm) {
/// Prints the shift value with an immediate value.
static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
- unsigned ShImm, bool UseMarkup) {
+ unsigned ShImm, bool UseMarkup) {
if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
return;
O << ", ";
- assert (!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
+ assert(!(ShOpc == ARM_AM::ror && !ShImm) && "Cannot have ror #0");
O << getShiftOpcStr(ShOpc);
if (ShOpc != ARM_AM::rrx) {
@@ -58,49 +58,52 @@ static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
}
}
-ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
-}
+ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- OS << markup("<reg:")
- << getRegisterName(RegNo)
- << markup(">");
+ OS << markup("<reg:") << getRegisterName(RegNo) << markup(">");
}
void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
unsigned Opcode = MI->getOpcode();
- switch(Opcode) {
+ switch (Opcode) {
// Check for HINT instructions w/ canonical names.
case ARM::HINT:
case ARM::tHINT:
case ARM::t2HINT:
switch (MI->getOperand(0).getImm()) {
- case 0: O << "\tnop"; break;
- case 1: O << "\tyield"; break;
- case 2: O << "\twfe"; break;
- case 3: O << "\twfi"; break;
- case 4: O << "\tsev"; break;
+ case 0:
+ O << "\tnop";
+ break;
+ case 1:
+ O << "\tyield";
+ break;
+ case 2:
+ O << "\twfe";
+ break;
+ case 3:
+ O << "\twfi";
+ break;
+ case 4:
+ O << "\tsev";
+ break;
case 5:
- if ((getAvailableFeatures() & ARM::HasV8Ops)) {
+ if ((STI.getFeatureBits() & ARM::HasV8Ops)) {
O << "\tsevl";
break;
} // Fallthrough for non-v8
default:
// Anything else should just print normally.
- printInstruction(MI, O);
+ printInstruction(MI, STI, O);
printAnnotation(O, Annot);
return;
}
- printPredicateOperand(MI, 1, O);
+ printPredicateOperand(MI, 1, STI, O);
if (Opcode == ARM::t2HINT)
O << ".w";
printAnnotation(O, Annot);
@@ -115,8 +118,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
const MCOperand &MO3 = MI->getOperand(3);
O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
- printSBitModifierOperand(MI, 6, O);
- printPredicateOperand(MI, 4, O);
+ printSBitModifierOperand(MI, 6, STI, O);
+ printPredicateOperand(MI, 4, STI, O);
O << '\t';
printRegName(O, Dst.getReg());
@@ -137,8 +140,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
const MCOperand &MO2 = MI->getOperand(2);
O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()));
- printSBitModifierOperand(MI, 5, O);
- printPredicateOperand(MI, 3, O);
+ printSBitModifierOperand(MI, 5, STI, O);
+ printPredicateOperand(MI, 3, STI, O);
O << '\t';
printRegName(O, Dst.getReg());
@@ -150,10 +153,8 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
return;
}
- O << ", "
- << markup("<imm:")
- << "#" << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm()))
- << markup(">");
+ O << ", " << markup("<imm:") << "#"
+ << translateShiftImm(ARM_AM::getSORegOffset(MO2.getImm())) << markup(">");
printAnnotation(O, Annot);
return;
}
@@ -164,11 +165,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
// Should only print PUSH if there are at least two registers in the list.
O << '\t' << "push";
- printPredicateOperand(MI, 2, O);
+ printPredicateOperand(MI, 2, STI, O);
if (Opcode == ARM::t2STMDB_UPD)
O << ".w";
O << '\t';
- printRegisterList(MI, 4, O);
+ printRegisterList(MI, 4, STI, O);
printAnnotation(O, Annot);
return;
} else
@@ -178,7 +179,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (MI->getOperand(2).getReg() == ARM::SP &&
MI->getOperand(3).getImm() == -4) {
O << '\t' << "push";
- printPredicateOperand(MI, 4, O);
+ printPredicateOperand(MI, 4, STI, O);
O << "\t{";
printRegName(O, MI->getOperand(1).getReg());
O << "}";
@@ -193,11 +194,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (MI->getOperand(0).getReg() == ARM::SP && MI->getNumOperands() > 5) {
// Should only print POP if there are at least two registers in the list.
O << '\t' << "pop";
- printPredicateOperand(MI, 2, O);
+ printPredicateOperand(MI, 2, STI, O);
if (Opcode == ARM::t2LDMIA_UPD)
O << ".w";
O << '\t';
- printRegisterList(MI, 4, O);
+ printRegisterList(MI, 4, STI, O);
printAnnotation(O, Annot);
return;
} else
@@ -207,7 +208,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (MI->getOperand(2).getReg() == ARM::SP &&
MI->getOperand(4).getImm() == 4) {
O << '\t' << "pop";
- printPredicateOperand(MI, 5, O);
+ printPredicateOperand(MI, 5, STI, O);
O << "\t{";
printRegName(O, MI->getOperand(0).getReg());
O << "}";
@@ -221,9 +222,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
case ARM::VSTMDDB_UPD:
if (MI->getOperand(0).getReg() == ARM::SP) {
O << '\t' << "vpush";
- printPredicateOperand(MI, 2, O);
+ printPredicateOperand(MI, 2, STI, O);
O << '\t';
- printRegisterList(MI, 4, O);
+ printRegisterList(MI, 4, STI, O);
printAnnotation(O, Annot);
return;
} else
@@ -234,9 +235,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
case ARM::VLDMDIA_UPD:
if (MI->getOperand(0).getReg() == ARM::SP) {
O << '\t' << "vpop";
- printPredicateOperand(MI, 2, O);
+ printPredicateOperand(MI, 2, STI, O);
O << '\t';
- printRegisterList(MI, 4, O);
+ printRegisterList(MI, 4, STI, O);
printAnnotation(O, Annot);
return;
} else
@@ -252,12 +253,13 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
O << "\tldm";
- printPredicateOperand(MI, 1, O);
+ printPredicateOperand(MI, 1, STI, O);
O << '\t';
printRegName(O, BaseReg);
- if (Writeback) O << "!";
+ if (Writeback)
+ O << "!";
O << ", ";
- printRegisterList(MI, 3, O);
+ printRegisterList(MI, 3, STI, O);
printAnnotation(O, Annot);
return;
}
@@ -268,9 +270,11 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
// GPRs. However, when decoding them, the two GRPs cannot be automatically
// expressed as a GPRPair, so we have to manually merge them.
// FIXME: We would really like to be able to tablegen'erate this.
- case ARM::LDREXD: case ARM::STREXD:
- case ARM::LDAEXD: case ARM::STLEXD: {
- const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID);
+ case ARM::LDREXD:
+ case ARM::STREXD:
+ case ARM::LDAEXD:
+ case ARM::STLEXD: {
+ const MCRegisterClass &MRC = MRI.getRegClass(ARM::GPRRegClassID);
bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD;
unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg();
if (MRC.contains(Reg)) {
@@ -280,28 +284,27 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
if (isStore)
NewMI.addOperand(MI->getOperand(0));
- NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(Reg, ARM::gsub_0,
- &MRI.getRegClass(ARM::GPRPairRegClassID)));
+ NewReg = MCOperand::CreateReg(MRI.getMatchingSuperReg(
+ Reg, ARM::gsub_0, &MRI.getRegClass(ARM::GPRPairRegClassID)));
NewMI.addOperand(NewReg);
// Copy the rest operands into NewMI.
- for(unsigned i= isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
+ for (unsigned i = isStore ? 3 : 2; i < MI->getNumOperands(); ++i)
NewMI.addOperand(MI->getOperand(i));
- printInstruction(&NewMI, O);
+ printInstruction(&NewMI, STI, O);
return;
}
break;
}
- // B9.3.3 ERET (Thumb)
- // For a target that has Virtualization Extensions, ERET is the preferred
- // disassembly of SUBS PC, LR, #0
+ // B9.3.3 ERET (Thumb)
+ // For a target that has Virtualization Extensions, ERET is the preferred
+ // disassembly of SUBS PC, LR, #0
case ARM::t2SUBS_PC_LR: {
- if (MI->getNumOperands() == 3 &&
- MI->getOperand(0).isImm() &&
+ if (MI->getNumOperands() == 3 && MI->getOperand(0).isImm() &&
MI->getOperand(0).getImm() == 0 &&
- (getAvailableFeatures() & ARM::FeatureVirtualization)) {
+ (STI.getFeatureBits() & ARM::FeatureVirtualization)) {
O << "\teret";
- printPredicateOperand(MI, 1, O);
+ printPredicateOperand(MI, 1, STI, O);
printAnnotation(O, Annot);
return;
}
@@ -309,20 +312,18 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
}
- printInstruction(MI, O);
+ printInstruction(MI, STI, O);
printAnnotation(O, Annot);
}
void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
unsigned Reg = Op.getReg();
printRegName(O, Reg);
} else if (Op.isImm()) {
- O << markup("<imm:")
- << '#' << formatImm(Op.getImm())
- << markup(">");
+ O << markup("<imm:") << '#' << formatImm(Op.getImm()) << markup(">");
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
@@ -354,6 +355,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
if (MO1.isExpr()) {
@@ -370,13 +372,9 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << markup("<imm:")
- << "#-" << formatImm(-OffImm)
- << markup(">");
+ O << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
} else {
- O << markup("<imm:")
- << "#" << formatImm(OffImm)
- << markup(">");
+ O << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
}
O << "]" << markup(">");
}
@@ -387,10 +385,11 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
// REG REG 0,SH_OPC - e.g. R5, ROR R3
// REG 0 IMM,SH_OPC - e.g. R5, LSL #3
void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
- const MCOperand &MO3 = MI->getOperand(OpNum+2);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+ const MCOperand &MO3 = MI->getOperand(OpNum + 2);
printRegName(O, MO1.getReg());
@@ -406,9 +405,10 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
printRegName(O, MO1.getReg());
@@ -417,28 +417,25 @@ void ARMInstPrinter::printSORegImmOperand(const MCInst *MI, unsigned OpNum,
ARM_AM::getSORegOffset(MO2.getImm()), UseMarkup);
}
-
//===--------------------------------------------------------------------===//
// Addressing Mode #2
//===--------------------------------------------------------------------===//
void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (!MO2.getReg()) {
if (ARM_AM::getAM2Offset(MO3.getImm())) { // Don't print +0.
- O << ", "
- << markup("<imm:")
- << "#"
+ O << ", " << markup("<imm:") << "#"
<< ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
- << ARM_AM::getAM2Offset(MO3.getImm())
- << markup(">");
+ << ARM_AM::getAM2Offset(MO3.getImm()) << markup(">");
}
O << "]" << markup(">");
return;
@@ -454,9 +451,10 @@ void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
O << ", ";
@@ -465,9 +463,10 @@ void ARMInstPrinter::printAddrModeTBB(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
O << ", ";
@@ -476,35 +475,35 @@ void ARMInstPrinter::printAddrModeTBH(const MCInst *MI, unsigned Op,
}
void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
return;
}
#ifndef NDEBUG
- const MCOperand &MO3 = MI->getOperand(Op+2);
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm());
- assert(IdxMode != ARMII::IndexModePost &&
- "Should be pre or offset index op");
+ assert(IdxMode != ARMII::IndexModePost && "Should be pre or offset index op");
#endif
- printAM2PreOrOffsetIndexOp(MI, Op, O);
+ printAM2PreOrOffsetIndexOp(MI, Op, STI, O);
}
void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
if (!MO1.getReg()) {
unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
- O << markup("<imm:")
- << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
- << ImmOffs
+ O << markup("<imm:") << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs
<< markup(">");
return;
}
@@ -524,8 +523,8 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
raw_ostream &O,
bool AlwaysPrintImm0) {
const MCOperand &MO1 = MI->getOperand(Op);
- const MCOperand &MO2 = MI->getOperand(Op+1);
- const MCOperand &MO3 = MI->getOperand(Op+2);
+ const MCOperand &MO2 = MI->getOperand(Op + 1);
+ const MCOperand &MO3 = MI->getOperand(Op + 2);
O << markup("<mem:") << '[';
printRegName(O, MO1.getReg());
@@ -537,16 +536,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
return;
}
- //If the op is sub we have to print the immediate even if it is 0
+ // If the op is sub we have to print the immediate even if it is 0
unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
ARM_AM::AddrOpc op = ARM_AM::getAM3Op(MO3.getImm());
if (AlwaysPrintImm0 || ImmOffs || (op == ARM_AM::sub)) {
- O << ", "
- << markup("<imm:")
- << "#"
- << ARM_AM::getAddrOpcStr(op)
- << ImmOffs
+ O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(op) << ImmOffs
<< markup(">");
}
O << ']' << markup(">");
@@ -554,10 +549,11 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
- if (!MO1.isReg()) { // For label symbolic references.
- printOperand(MI, Op, O);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, Op, STI, O);
return;
}
@@ -569,9 +565,10 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
if (MO1.getReg()) {
O << getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()));
@@ -580,56 +577,56 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
}
unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
- O << markup("<imm:")
- << '#' << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
+ O << markup("<imm:") << '#'
+ << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs
<< markup(">");
}
-void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI,
- unsigned OpNum,
+void ARMInstPrinter::printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << markup("<imm:")
- << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
+ O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << (Imm & 0xff)
<< markup(">");
}
void ARMInstPrinter::printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
O << (MO2.getImm() ? "" : "-");
printRegName(O, MO1.getReg());
}
-void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
unsigned Imm = MO.getImm();
- O << markup("<imm:")
- << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
+ O << markup("<imm:") << '#' << ((Imm & 256) ? "" : "-") << ((Imm & 0xff) << 2)
<< markup(">");
}
-
void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
- .getImm());
+ ARM_AM::AMSubMode Mode =
+ ARM_AM::getAM4SubMode(MI->getOperand(OpNum).getImm());
O << ARM_AM::getAMSubModeStr(Mode);
}
template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, OpNum, O);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, STI, O);
return;
}
@@ -639,20 +636,17 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
if (AlwaysPrintImm0 || ImmOffs || Op == ARM_AM::sub) {
- O << ", "
- << markup("<imm:")
- << "#"
- << ARM_AM::getAddrOpcStr(Op)
- << ImmOffs * 4
- << markup(">");
+ O << ", " << markup("<imm:") << "#" << ARM_AM::getAddrOpcStr(Op)
+ << ImmOffs * 4 << markup(">");
}
O << "]" << markup(">");
}
void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
@@ -663,6 +657,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
O << markup("<mem:") << "[";
@@ -672,6 +667,7 @@ void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.getReg() == 0)
@@ -684,49 +680,47 @@ void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
int32_t lsb = countTrailingZeros(v);
- int32_t width = (32 - countLeadingZeros (v)) - lsb;
+ int32_t width = (32 - countLeadingZeros(v)) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
- O << markup("<imm:") << '#' << lsb << markup(">")
- << ", "
- << markup("<imm:") << '#' << width << markup(">");
+ O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
+ << '#' << width << markup(">");
}
void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val, (getAvailableFeatures() & ARM::HasV8Ops));
+ O << ARM_MB::MemBOptToString(val, (STI.getFeatureBits() & ARM::HasV8Ops));
}
void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
O << ARM_ISB::InstSyncBOptToString(val);
}
void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned ShiftOp = MI->getOperand(OpNum).getImm();
bool isASR = (ShiftOp & (1 << 5)) != 0;
unsigned Amt = ShiftOp & 0x1f;
if (isASR) {
- O << ", asr "
- << markup("<imm:")
- << "#" << (Amt == 0 ? 32 : Amt)
- << markup(">");
- }
- else if (Amt) {
- O << ", lsl "
- << markup("<imm:")
- << "#" << Amt
+ O << ", asr " << markup("<imm:") << "#" << (Amt == 0 ? 32 : Amt)
<< markup(">");
+ } else if (Amt) {
+ O << ", lsl " << markup("<imm:") << "#" << Amt << markup(">");
}
}
void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
if (Imm == 0)
@@ -736,6 +730,7 @@ void ARMInstPrinter::printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
// A shift amount of 32 is encoded as 0.
@@ -746,16 +741,19 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "{";
for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
- if (i != OpNum) O << ", ";
+ if (i != OpNum)
+ O << ", ";
printRegName(O, MI->getOperand(i).getReg());
}
O << "}";
}
void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
printRegName(O, MRI.getSubReg(Reg, ARM::gsub_0));
@@ -763,8 +761,8 @@ void ARMInstPrinter::printGPRPairOperand(const MCInst *MI, unsigned OpNum,
printRegName(O, MRI.getSubReg(Reg, ARM::gsub_1));
}
-
void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
if (Op.getImm())
@@ -774,16 +772,16 @@ void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
O << ARM_PROC::IModToString(Op.getImm());
}
void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
unsigned IFlags = Op.getImm();
- for (int i=2; i >= 0; --i)
+ for (int i = 2; i >= 0; --i)
if (IFlags & (1 << i))
O << ARM_PROC::IFlagsToString(1 << i);
@@ -792,11 +790,12 @@ void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
unsigned SpecRegRBit = Op.getImm() >> 4;
unsigned Mask = Op.getImm() & 0xf;
- uint64_t FeatureBits = getAvailableFeatures();
+ uint64_t FeatureBits = STI.getFeatureBits();
if (FeatureBits & ARM::FeatureMClass) {
unsigned SYSm = Op.getImm();
@@ -805,14 +804,30 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
// For writes, handle extended mask bits if the DSP extension is present.
if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) {
switch (SYSm) {
- case 0x400: O << "apsr_g"; return;
- case 0xc00: O << "apsr_nzcvqg"; return;
- case 0x401: O << "iapsr_g"; return;
- case 0xc01: O << "iapsr_nzcvqg"; return;
- case 0x402: O << "eapsr_g"; return;
- case 0xc02: O << "eapsr_nzcvqg"; return;
- case 0x403: O << "xpsr_g"; return;
- case 0xc03: O << "xpsr_nzcvqg"; return;
+ case 0x400:
+ O << "apsr_g";
+ return;
+ case 0xc00:
+ O << "apsr_nzcvqg";
+ return;
+ case 0x401:
+ O << "iapsr_g";
+ return;
+ case 0xc01:
+ O << "iapsr_nzcvqg";
+ return;
+ case 0x402:
+ O << "eapsr_g";
+ return;
+ case 0xc02:
+ O << "eapsr_nzcvqg";
+ return;
+ case 0x403:
+ O << "xpsr_g";
+ return;
+ case 0xc03:
+ O << "xpsr_nzcvqg";
+ return;
}
}
@@ -823,29 +838,66 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
// ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an
// alias for MSR APSR_nzcvq.
switch (SYSm) {
- case 0: O << "apsr_nzcvq"; return;
- case 1: O << "iapsr_nzcvq"; return;
- case 2: O << "eapsr_nzcvq"; return;
- case 3: O << "xpsr_nzcvq"; return;
+ case 0:
+ O << "apsr_nzcvq";
+ return;
+ case 1:
+ O << "iapsr_nzcvq";
+ return;
+ case 2:
+ O << "eapsr_nzcvq";
+ return;
+ case 3:
+ O << "xpsr_nzcvq";
+ return;
}
}
switch (SYSm) {
- default: llvm_unreachable("Unexpected mask value!");
- case 0: O << "apsr"; return;
- case 1: O << "iapsr"; return;
- case 2: O << "eapsr"; return;
- case 3: O << "xpsr"; return;
- case 5: O << "ipsr"; return;
- case 6: O << "epsr"; return;
- case 7: O << "iepsr"; return;
- case 8: O << "msp"; return;
- case 9: O << "psp"; return;
- case 16: O << "primask"; return;
- case 17: O << "basepri"; return;
- case 18: O << "basepri_max"; return;
- case 19: O << "faultmask"; return;
- case 20: O << "control"; return;
+ default:
+ llvm_unreachable("Unexpected mask value!");
+ case 0:
+ O << "apsr";
+ return;
+ case 1:
+ O << "iapsr";
+ return;
+ case 2:
+ O << "eapsr";
+ return;
+ case 3:
+ O << "xpsr";
+ return;
+ case 5:
+ O << "ipsr";
+ return;
+ case 6:
+ O << "epsr";
+ return;
+ case 7:
+ O << "iepsr";
+ return;
+ case 8:
+ O << "msp";
+ return;
+ case 9:
+ O << "psp";
+ return;
+ case 16:
+ O << "primask";
+ return;
+ case 17:
+ O << "basepri";
+ return;
+ case 18:
+ O << "basepri_max";
+ return;
+ case 19:
+ O << "faultmask";
+ return;
+ case 20:
+ O << "control";
+ return;
}
}
@@ -854,10 +906,17 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
O << "APSR_";
switch (Mask) {
- default: llvm_unreachable("Unexpected mask value!");
- case 4: O << "g"; return;
- case 8: O << "nzcvq"; return;
- case 12: O << "nzcvqg"; return;
+ default:
+ llvm_unreachable("Unexpected mask value!");
+ case 4:
+ O << "g";
+ return;
+ case 8:
+ O << "nzcvq";
+ return;
+ case 12:
+ O << "nzcvqg";
+ return;
}
}
@@ -868,14 +927,19 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
if (Mask) {
O << '_';
- if (Mask & 8) O << 'f';
- if (Mask & 4) O << 's';
- if (Mask & 2) O << 'x';
- if (Mask & 1) O << 'c';
+ if (Mask & 8)
+ O << 'f';
+ if (Mask & 4)
+ O << 's';
+ if (Mask & 2)
+ O << 'x';
+ if (Mask & 1)
+ O << 'c';
}
}
void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
uint32_t Banked = MI->getOperand(OpNum).getImm();
uint32_t R = (Banked & 0x20) >> 5;
@@ -886,25 +950,40 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
if (R) {
O << "SPSR_";
- switch(SysM) {
- case 0x0e: O << "fiq"; return;
- case 0x10: O << "irq"; return;
- case 0x12: O << "svc"; return;
- case 0x14: O << "abt"; return;
- case 0x16: O << "und"; return;
- case 0x1c: O << "mon"; return;
- case 0x1e: O << "hyp"; return;
- default: llvm_unreachable("Invalid banked SPSR register");
+ switch (SysM) {
+ case 0x0e:
+ O << "fiq";
+ return;
+ case 0x10:
+ O << "irq";
+ return;
+ case 0x12:
+ O << "svc";
+ return;
+ case 0x14:
+ O << "abt";
+ return;
+ case 0x16:
+ O << "und";
+ return;
+ case 0x1c:
+ O << "mon";
+ return;
+ case 0x1e:
+ O << "hyp";
+ return;
+ default:
+ llvm_unreachable("Invalid banked SPSR register");
}
}
assert(!R && "should have dealt with SPSR regs");
const char *RegNames[] = {
- "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "",
- "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "",
- "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und",
- "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"
- };
+ "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr",
+ "", "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq",
+ "lr_fiq", "", "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt",
+ "sp_abt", "lr_und", "sp_und", "", "", "", "",
+ "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"};
const char *Name = RegNames[SysM];
assert(Name[0] && "invalid banked register operand");
@@ -912,6 +991,7 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
// Handle the undefined 15 CC value here for printing so we don't abort().
@@ -923,12 +1003,14 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
O << ARMCondCodeToString(CC);
}
void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
if (MI->getOperand(OpNum).getReg()) {
assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
@@ -938,33 +1020,38 @@ void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "p" << MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "c" << MI->getOperand(OpNum).getImm();
}
void ARMInstPrinter::printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "{" << MI->getOperand(OpNum).getImm() << "}";
}
void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI, raw_ostream &O) {
llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
}
-template<unsigned scale>
+template <unsigned scale>
void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
if (MO.isExpr()) {
@@ -985,25 +1072,26 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- O << markup("<imm:")
- << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
+ O << markup("<imm:") << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
<< markup(">");
}
void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << markup("<imm:")
- << "#" << formatImm((Imm == 0 ? 32 : Imm))
+ O << markup("<imm:") << "#" << formatImm((Imm == 0 ? 32 : Imm))
<< markup(">");
}
void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned Firstcond = MI->getOperand(OpNum-1).getImm();
+ unsigned Firstcond = MI->getOperand(OpNum - 1).getImm();
unsigned CondBit0 = Firstcond & 1;
unsigned NumTZ = countTrailingZeros(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
@@ -1017,12 +1105,13 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op + 1);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
return;
}
@@ -1037,22 +1126,21 @@ void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O,
unsigned Scale) {
const MCOperand &MO1 = MI->getOperand(Op);
const MCOperand &MO2 = MI->getOperand(Op + 1);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, Op, O);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, Op, STI, O);
return;
}
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (unsigned ImmOffs = MO2.getImm()) {
- O << ", "
- << markup("<imm:")
- << "#" << formatImm(ImmOffs * Scale)
+ O << ", " << markup("<imm:") << "#" << formatImm(ImmOffs * Scale)
<< markup(">");
}
O << "]" << markup(">");
@@ -1060,25 +1148,29 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 1);
}
void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 2);
}
void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
}
void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
- printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+ printThumbAddrModeImm5SOperand(MI, Op, STI, O, 4);
}
// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
@@ -1086,9 +1178,10 @@ void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
// REG 0 0 - e.g. R5
// REG IMM, SH_OPC - e.g. R5, LSL #3
void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
unsigned Reg = MO1.getReg();
printRegName(O, Reg);
@@ -1101,12 +1194,13 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
template <bool AlwaysPrintImm0>
void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
- printOperand(MI, OpNum, O);
+ if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right.
+ printOperand(MI, OpNum, STI, O);
return;
}
@@ -1119,26 +1213,20 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", "
- << markup("<imm:")
- << "#-" << formatImm(-OffImm)
- << markup(">");
- }
- else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", "
- << markup("<imm:")
- << "#" << formatImm(OffImm)
- << markup(">");
+ O << ", " << markup("<imm:") << "#-" << formatImm(-OffImm) << markup(">");
+ } else if (AlwaysPrintImm0 || OffImm > 0) {
+ O << ", " << markup("<imm:") << "#" << formatImm(OffImm) << markup(">");
}
O << "]" << markup(">");
}
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
@@ -1149,28 +1237,23 @@ void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", "
- << markup("<imm:")
- << "#-" << -OffImm
- << markup(">");
+ O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
} else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", "
- << markup("<imm:")
- << "#" << OffImm
- << markup(">");
+ O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
}
O << "]" << markup(">");
}
-template<bool AlwaysPrintImm0>
+template <bool AlwaysPrintImm0>
void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
- if (!MO1.isReg()) { // For label symbolic references.
- printOperand(MI, OpNum, O);
+ if (!MO1.isReg()) { // For label symbolic references.
+ printOperand(MI, OpNum, STI, O);
return;
}
@@ -1186,39 +1269,31 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
if (OffImm == INT32_MIN)
OffImm = 0;
if (isSub) {
- O << ", "
- << markup("<imm:")
- << "#-" << -OffImm
- << markup(">");
+ O << ", " << markup("<imm:") << "#-" << -OffImm << markup(">");
} else if (AlwaysPrintImm0 || OffImm > 0) {
- O << ", "
- << markup("<imm:")
- << "#" << OffImm
- << markup(">");
+ O << ", " << markup("<imm:") << "#" << OffImm << markup(">");
}
O << "]" << markup(">");
}
-void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
if (MO2.getImm()) {
- O << ", "
- << markup("<imm:")
- << "#" << formatImm(MO2.getImm() * 4)
+ O << ", " << markup("<imm:") << "#" << formatImm(MO2.getImm() * 4)
<< markup(">");
}
O << "]" << markup(">");
}
-void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
O << ", " << markup("<imm:");
@@ -1231,9 +1306,9 @@ void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
O << markup(">");
}
-void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
int32_t OffImm = (int32_t)MO1.getImm();
@@ -1251,10 +1326,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO1 = MI->getOperand(OpNum);
- const MCOperand &MO2 = MI->getOperand(OpNum+1);
- const MCOperand &MO3 = MI->getOperand(OpNum+2);
+ const MCOperand &MO2 = MI->getOperand(OpNum + 1);
+ const MCOperand &MO3 = MI->getOperand(OpNum + 2);
O << markup("<mem:") << "[";
printRegName(O, MO1.getReg());
@@ -1266,71 +1342,74 @@ void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
unsigned ShAmt = MO3.getImm();
if (ShAmt) {
assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
- O << ", lsl "
- << markup("<imm:")
- << "#" << ShAmt
- << markup(">");
+ O << ", lsl " << markup("<imm:") << "#" << ShAmt << markup(">");
}
O << "]" << markup(">");
}
void ARMInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
- O << markup("<imm:")
- << '#' << ARM_AM::getFPImmFloat(MO.getImm())
+ O << markup("<imm:") << '#' << ARM_AM::getFPImmFloat(MO.getImm())
<< markup(">");
}
void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned EncodedImm = MI->getOperand(OpNum).getImm();
unsigned EltBits;
uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
- O << markup("<imm:")
- << "#0x";
+ O << markup("<imm:") << "#0x";
O.write_hex(Val);
O << markup(">");
}
void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
- O << markup("<imm:")
- << "#" << formatImm(Imm + 1)
- << markup(">");
+ O << markup("<imm:") << "#" << formatImm(Imm + 1) << markup(">");
}
void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
if (Imm == 0)
return;
- O << ", ror "
- << markup("<imm:")
- << "#";
+ O << ", ror " << markup("<imm:") << "#";
switch (Imm) {
- default: assert (0 && "illegal ror immediate!");
- case 1: O << "8"; break;
- case 2: O << "16"; break;
- case 3: O << "24"; break;
+ default:
+ assert(0 && "illegal ror immediate!");
+ case 1:
+ O << "8";
+ break;
+ case 2:
+ O << "16";
+ break;
+ case 3:
+ O << "24";
+ break;
}
O << markup(">");
}
void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
MCOperand Op = MI->getOperand(OpNum);
// Support for fixups (MCFixup)
if (Op.isExpr())
- return printOperand(MI, OpNum, O);
+ return printOperand(MI, OpNum, STI, O);
unsigned Bits = Op.getImm() & 0xFF;
unsigned Rot = (Op.getImm() & 0xF00) >> 7;
- bool PrintUnsigned = false;
- switch (MI->getOpcode()){
+ bool PrintUnsigned = false;
+ switch (MI->getOpcode()) {
case ARM::MOVi:
// Movs to PC should be treated unsigned
PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC);
@@ -1354,36 +1433,30 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
}
// Explicit #bits, #rot implied
- O << "#"
- << markup("<imm:")
- << Bits
- << markup(">")
- << ", #"
- << markup("<imm:")
- << Rot
- << markup(">");
+ O << "#" << markup("<imm:") << Bits << markup(">") << ", #" << markup("<imm:")
+ << Rot << markup(">");
}
void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << markup("<imm:")
- << "#" << 16 - MI->getOperand(OpNum).getImm()
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ O << markup("<imm:") << "#" << 16 - MI->getOperand(OpNum).getImm()
<< markup(">");
}
void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << markup("<imm:")
- << "#" << 32 - MI->getOperand(OpNum).getImm()
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ O << markup("<imm:") << "#" << 32 - MI->getOperand(OpNum).getImm()
<< markup(">");
}
void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "{";
printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1391,7 +1464,8 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
@@ -1402,8 +1476,8 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
O << "}";
}
-void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
- unsigned OpNum,
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1416,6 +1490,7 @@ void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
}
void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
@@ -1430,6 +1505,7 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
}
void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
@@ -1447,6 +1523,7 @@ void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
O << "{";
printRegName(O, MI->getOperand(OpNum).getReg());
@@ -1455,6 +1532,7 @@ void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
@@ -1468,6 +1546,7 @@ void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
@@ -1482,8 +1561,9 @@ void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
}
void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
@@ -1498,9 +1578,9 @@ void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
O << "[]}";
}
-void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNum).getReg();
unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
@@ -1511,24 +1591,24 @@ void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
O << "[]}";
}
-void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
O << "{";
printRegName(O, MI->getOperand(OpNum).getReg());
- O << "[], ";
+ O << "[], ";
printRegName(O, MI->getOperand(OpNum).getReg() + 2);
O << "[], ";
printRegName(O, MI->getOperand(OpNum).getReg() + 4);
O << "[]}";
}
-void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(
+ const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
@@ -1545,6 +1625,7 @@ void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
@@ -1558,9 +1639,9 @@ void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
O << "}";
}
-void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
// Normally, it's not safe to use register enum values directly with
// addition to get the next register, but for VFP registers, the
// sort order is guaranteed because they're all of the form D<n>.
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index f179e01..3927c9f 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -24,146 +24,207 @@ class MCOperand;
class ARMInstPrinter : public MCInstPrinter {
public:
ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
-
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
- void printSORegRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSORegImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printAddrModeTBB(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAddrModeTBH(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+
+ void printSORegRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSORegImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printAddrModeTBB(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrModeTBH(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode2Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <bool AlwaysPrintImm0>
- void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O,
bool AlwaysPrintImm0);
void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPostIdxRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printPostIdxImm8s4Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <bool AlwaysPrintImm0>
- void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAddrMode7Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
- void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printInstSyncBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printInstSyncBOption(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPKHLSLShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
template <unsigned scale>
- void printAdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printThumbSRImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbSRImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printThumbITMask(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O, unsigned Scale);
void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
- void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- template<bool AlwaysPrintImm0>
+ void printT2SOOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- template<bool AlwaysPrintImm0>
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- template<bool AlwaysPrintImm0>
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ template <bool AlwaysPrintImm0>
void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printT2AddrModeImm0_1020s4Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printSetendOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCPSIMod(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCPSIFlag(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printBankedRegOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCoprocOptionImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCImmediate(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCoprocOptionImm(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRotImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printModImmOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printGPRPairOperand(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+
+ void printPCLabel(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFBits16(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printFBits32(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListOne(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListTwo(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListThree(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printVectorListFour(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
raw_ostream &O);
void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ const MCSubtargetInfo &STI, raw_ostream &O);
};
} // end namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0b2e3b0..590d72f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -153,18 +153,20 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
}
} // end anonymous namespace
-static unsigned getRelaxedOpcode(unsigned Op) {
+unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op) const {
+ bool HasThumb2 = STI->getFeatureBits() & ARM::FeatureThumb2;
+
switch (Op) {
default:
return Op;
case ARM::tBcc:
- return ARM::t2Bcc;
+ return HasThumb2 ? (unsigned)ARM::t2Bcc : Op;
case ARM::tLDRpci:
- return ARM::t2LDRpci;
+ return HasThumb2 ? (unsigned)ARM::t2LDRpci : Op;
case ARM::tADR:
- return ARM::t2ADR;
+ return HasThumb2 ? (unsigned)ARM::t2ADR : Op;
case ARM::tB:
- return ARM::t2B;
+ return HasThumb2 ? (unsigned)ARM::t2B : Op;
case ARM::tCBZ:
return ARM::tHINT;
case ARM::tCBNZ:
@@ -589,7 +591,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
(unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
(unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
if (A) {
- const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ const MCSymbol &Sym = A->getSymbol();
if (Asm.isThumbFunc(&Sym))
Value |= 1;
}
@@ -598,7 +600,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the basic blocks of the same function. Thus, we would like to resolve
// the offset when the destination has the same MCFragment.
if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
- const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+ const MCSymbol &Sym = A->getSymbol();
const MCSymbolData &SymData = Asm.getSymbolData(Sym);
IsResolved = (SymData.getFragment() == DF);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index f4f1082..4fa8c79 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -47,6 +47,8 @@ public:
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
+ unsigned getRelaxedOpcode(unsigned Op) const;
+
bool mayNeedRelaxation(const MCInst &Inst) const override;
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 3bd7ab7..ebef789 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -23,7 +23,7 @@ public:
HasDataInCodeSupport = true;
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
Subtype);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 4efd325..263c4c4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -18,7 +18,7 @@ public:
ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle)
: ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createARMELFObjectWriter(OS, OSABI, isLittle());
}
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 33be347..f2c4358 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -17,7 +17,7 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend {
public:
ARMAsmBackendWinCOFF(const Target &T, StringRef Triple)
: ARMAsmBackend(T, Triple, true) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
}
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a821a6b..f4fedee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -32,7 +32,7 @@ namespace {
public:
ARMELFObjectWriter(uint8_t OSABI);
- virtual ~ARMELFObjectWriter();
+ ~ARMELFObjectWriter() override;
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
@@ -81,7 +81,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
unsigned Type = 0;
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("Unimplemented");
+ default:
+ report_fatal_error("unsupported relocation on symbol");
+ return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
@@ -147,7 +149,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
} else {
switch ((unsigned)Fixup.getKind()) {
- default: llvm_unreachable("invalid fixup kind!");
+ default:
+ report_fatal_error("unsupported relocation on symbol");
+ return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
default: llvm_unreachable("unsupported Modifier");
@@ -247,7 +251,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
return Type;
}
-MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI,
bool IsLittleEndian) {
MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 9648ffa..e7c777e 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -472,7 +472,7 @@ class ARMELFStreamer : public MCELFStreamer {
public:
friend class ARMTargetELFStreamer;
- ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
+ ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
MappingSymbolCounter(0), LastEMS(EMS_None) {
@@ -1083,14 +1083,13 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
// Get .ARM.extab or .ARM.exidx section
- const MCSectionELF *EHSection = nullptr;
- if (const MCSymbol *Group = FnSection.getGroup()) {
- EHSection =
- getContext().getELFSection(EHSecName, Type, Flags | ELF::SHF_GROUP,
- FnSection.getEntrySize(), Group->getName());
- } else {
- EHSection = getContext().getELFSection(EHSecName, Type, Flags);
- }
+ const MCSymbol *Group = FnSection.getGroup();
+ if (Group)
+ Flags |= ELF::SHF_GROUP;
+ const MCSectionELF *EHSection =
+ getContext().getELFSection(EHSecName, Type, Flags, 0, Group,
+ FnSection.getUniqueID(), nullptr, &FnSection);
+
assert(EHSection && "Failed to get the required EH section");
// Switch to .ARM.extab or .ARM.exidx section
@@ -1383,8 +1382,9 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S,
}
MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool IsThumb) {
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll,
+ bool IsThumb) {
ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb);
// FIXME: This should eventually end up somewhere else where more
// intelligent flag decisions can be made. For now we are just maintaining
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index e48cabb..6b650f0 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -48,7 +48,7 @@ public:
: MCII(mcii), CTX(ctx), IsLittleEndian(IsLittle) {
}
- ~ARMMCCodeEmitter() {}
+ ~ARMMCCodeEmitter() override {}
bool isThumb(const MCSubtargetInfo &STI) const {
return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 7ff7f9a..daa8af2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -255,7 +255,7 @@ MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
std::string ArchFS = ARM_MC::ParseARMTriple(TT, CPU);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS.str();
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
else
ArchFS = FS;
}
@@ -310,27 +310,26 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
}
static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
- MCAsmBackend &MAB, raw_ostream &OS,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll) {
return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
T.getArch() == Triple::thumb);
}
static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
+ raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll,
bool DWARFMustBeAtTheEnd) {
return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd);
}
-static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+static MCInstPrinter *createARMMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new ARMInstPrinter(MAI, MII, MRI, STI);
+ return new ARMInstPrinter(MAI, MII, MRI);
return nullptr;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 7e9ba66..24ca567 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -34,6 +34,7 @@ class StringRef;
class Target;
class Triple;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheARMLETarget, TheThumbLETarget;
extern Target TheARMBETarget, TheThumbBETarget;
@@ -41,9 +42,8 @@ extern Target TheARMBETarget, TheThumbBETarget;
namespace ARM_MC {
std::string ParseARMTriple(StringRef TT, StringRef CPU);
- /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
- /// This is exposed so Asm parser, etc. do not need to go through
- /// TargetRegistry.
+ /// Create a ARM MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+ /// do not need to go through TargetRegistry.
MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS);
}
@@ -83,24 +83,23 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI
// Construct a PE/COFF machine code streamer which will generate a PE/COFF
// object file.
MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll);
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
-/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
-MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
+/// Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
bool IsLittleEndian);
-/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
-MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
+/// Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint32_t CPUType,
uint32_t CPUSubtype);
-/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit);
-/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
+/// Construct ARM Mach-O relocation info.
MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
} // End llvm namespace
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 3187d36..b1f9b58 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -88,6 +88,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
case ARM::fixup_arm_ldst_pcrel_12:
case ARM::fixup_arm_pcrel_10:
case ARM::fixup_arm_adr_pcrel_12:
+ case ARM::fixup_arm_thumb_br:
return false;
// Handle 24-bit branch kinds.
@@ -101,12 +102,6 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
Log2Size = llvm::Log2_32(4);
return true;
- // Handle Thumb branches.
- case ARM::fixup_arm_thumb_br:
- RelocType = unsigned(MachO::ARM_THUMB_RELOC_BR22);
- Log2Size = llvm::Log2_32(2);
- return true;
-
case ARM::fixup_t2_uncondbranch:
case ARM::fixup_arm_thumb_bl:
case ARM::fixup_arm_thumb_blx:
@@ -477,9 +472,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
CPUType,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
index 2fd6445..166c04b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -26,7 +26,7 @@ public:
: MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
assert(!Is64Bit && "AArch64 support not yet implemented");
}
- virtual ~ARMWinCOFFObjectWriter() { }
+ ~ARMWinCOFFObjectWriter() override {}
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsCrossSection,
@@ -82,7 +82,8 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
}
namespace llvm {
-MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit) {
MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
return createWinCOFFObjectWriter(MOTW, OS);
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index dc707dc..b993b1b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -16,8 +16,8 @@ namespace {
class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
public:
ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
- raw_ostream &OS)
- : MCWinCOFFStreamer(C, AB, CE, OS) { }
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) {}
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitThumbFunc(MCSymbol *Symbol) override;
@@ -38,7 +38,8 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
}
MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB, raw_ostream &OS,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
MCCodeEmitter *Emitter,
bool RelaxAll) {
return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
diff --git a/lib/Target/BPF/BPFISelDAGToDAG.cpp b/lib/Target/BPF/BPFISelDAGToDAG.cpp
index b91b0e1..b2599fe 100644
--- a/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -132,7 +132,7 @@ SDNode *BPFDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::FrameIndex: {
- int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT VT = Node->getValueType(0);
SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
unsigned Opc = BPF::MOV_rr;
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index d94416b..37f9164 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -63,11 +63,11 @@ public:
std::string Str;
raw_string_ostream OS(Str);
- if (DLoc.isUnknown() == false) {
- DILocation DIL(DLoc.getAsMDNode(Fn.getContext()));
- StringRef Filename = DIL.getFilename();
- unsigned Line = DIL.getLineNumber();
- unsigned Column = DIL.getColumnNumber();
+ if (DLoc) {
+ auto DIL = DLoc.get();
+ StringRef Filename = DIL->getFilename();
+ unsigned Line = DIL->getLine();
+ unsigned Column = DIL->getColumn();
OS << Filename << ':' << Line << ':' << Column << ' ';
}
@@ -137,7 +137,6 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
- setOperationAction(ISD::BSWAP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
@@ -538,12 +537,10 @@ SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
MachineBasicBlock *
BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
- unsigned Opc = MI->getOpcode();
-
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- assert(Opc == BPF::Select && "Unexpected instr type to insert");
+ assert(MI->getOpcode() == BPF::Select && "Unexpected instr type to insert");
// To "insert" a SELECT instruction, we actually have to insert the diamond
// control-flow pattern. The incoming instruction knows the destination vreg
diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
index 47001f0..26b2cfe 100644
--- a/lib/Target/BPF/BPFInstrInfo.td
+++ b/lib/Target/BPF/BPFInstrInfo.td
@@ -231,8 +231,6 @@ class MOV_RI<string OpcodeStr>
let BPFSrc = 0; // BPF_K
let BPFClass = 7; // BPF_ALU64
}
-def MOV_rr : MOV_RR<"mov">;
-def MOV_ri : MOV_RI<"mov">;
class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
: InstBPF<(outs GPR:$dst), (ins u64imm:$imm),
@@ -255,7 +253,35 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
let size = 3; // BPF_DW
let BPFClass = 0; // BPF_LD
}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def LD_imm64 : LD_IMM64<0, "ld_64">;
+def MOV_rr : MOV_RR<"mov">;
+def MOV_ri : MOV_RI<"mov">;
+}
+
+def LD_pseudo
+ : InstBPF<(outs GPR:$dst), (ins i64imm:$pseudo, u64imm:$imm),
+ "ld_pseudo\t$dst, $pseudo, $imm",
+ [(set GPR:$dst, (int_bpf_pseudo imm:$pseudo, imm:$imm))]> {
+
+ bits<3> mode;
+ bits<2> size;
+ bits<4> dst;
+ bits<64> imm;
+ bits<4> pseudo;
+
+ let Inst{63-61} = mode;
+ let Inst{60-59} = size;
+ let Inst{51-48} = dst;
+ let Inst{55-52} = pseudo;
+ let Inst{47-32} = 0;
+ let Inst{31-0} = imm{31-0};
+
+ let mode = 0; // BPF_IMM
+ let size = 3; // BPF_DW
+ let BPFClass = 0; // BPF_LD
+}
// STORE instructions
class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
@@ -461,6 +487,33 @@ def XADD64 : XADD<3, "xadd64", atomic_load_add_64>;
// undefined def XADD8 : XADD<2, "xadd8", atomic_load_add_8>;
}
+// bswap16, bswap32, bswap64
+class BSWAP<bits<32> SizeOp, string OpcodeStr, list<dag> Pattern>
+ : InstBPF<(outs GPR:$dst), (ins GPR:$src),
+ !strconcat(OpcodeStr, "\t$dst"),
+ Pattern> {
+ bits<4> op;
+ bits<1> BPFSrc;
+ bits<4> dst;
+ bits<32> imm;
+
+ let Inst{63-60} = op;
+ let Inst{59} = BPFSrc;
+ let Inst{51-48} = dst;
+ let Inst{31-0} = imm;
+
+ let op = 0xd; // BPF_END
+ let BPFSrc = 1; // BPF_TO_BE (TODO: use BPF_TO_LE for big-endian target)
+ let BPFClass = 4; // BPF_ALU
+ let imm = SizeOp;
+}
+
+let Constraints = "$dst = $src" in {
+def BSWAP16 : BSWAP<16, "bswap16", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 48)))]>;
+def BSWAP32 : BSWAP<32, "bswap32", [(set GPR:$dst, (srl (bswap GPR:$src), (i64 32)))]>;
+def BSWAP64 : BSWAP<64, "bswap64", [(set GPR:$dst, (bswap GPR:$src))]>;
+}
+
let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in {
class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index 3f09379..05f6d82 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
#include "BPFGenAsmWriter.inc"
void BPFInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
printInstruction(MI, O);
printAnnotation(O, Annot);
}
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index d7c2899..adcaff6 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -25,7 +25,8 @@ public:
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 87c8077..8393135 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -31,7 +31,7 @@ public:
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
// No instruction requires relaxation
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -71,7 +71,7 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
*(uint16_t *)&Data[Fixup.getOffset() + 2] = (uint16_t)((Value - 8) / 8);
}
-MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
return createBPFELFObjectWriter(OS, 0);
}
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 169a8a7..a5562c1 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -47,7 +47,7 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
}
}
-MCObjectWriter *llvm::createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI) {
+MCObjectWriter *llvm::createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new BPFELFObjectWriter(OSABI);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
}
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index 9c51d66..32d2ef5 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -76,9 +76,8 @@ unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
assert(MO.isExpr());
const MCExpr *Expr = MO.getExpr();
- MCExpr::ExprKind Kind = Expr->getKind();
- assert(Kind == MCExpr::SymbolRef);
+ assert(Expr->getKind() == MCExpr::SymbolRef);
if (MI.getOpcode() == BPF::JAL)
// func call name
@@ -125,7 +124,7 @@ void BPFMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// Keep track of the current byte being emitted
unsigned CurByte = 0;
- if (Opcode == BPF::LD_imm64) {
+ if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
EmitByte(Value >> 56, CurByte, OS);
EmitByte(((Value >> 48) & 0xff), CurByte, OS);
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index fd04001..95f0b02 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -63,15 +63,16 @@ static MCCodeGenInfo *createBPFMCCodeGenInfo(StringRef TT, Reloc::Model RM,
static MCStreamer *createBPFMCStreamer(const Triple &T,
MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter,
bool RelaxAll) {
return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
}
-static MCInstPrinter *
-createBPFMCInstPrinter(const Target &T, unsigned SyntaxVariant,
- const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) {
+static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new BPFInstPrinter(MAI, MII, MRI);
return 0;
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index 1fd2bec..ce08b7c 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCSubtargetInfo;
class Target;
class StringRef;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheBPFTarget;
@@ -38,7 +39,7 @@ MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-MCObjectWriter *createBPFELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
}
// Defines symbolic names for BPF registers. This defines a mapping from
diff --git a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
index 209d17c..8bca2e3 100644
--- a/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/BPF/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = BPFDesc
parent = BPF
-required_libraries = MC BPFAsmPrinter BPFInfo
+required_libraries = MC BPFAsmPrinter BPFInfo Support
add_to_library_groups = BPF
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index d0e2010..f1a7127 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -15,6 +15,7 @@
#include "CPPTargetMachine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/config.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -91,6 +92,7 @@ namespace {
/// CppWriter - This class is the main chunk of code that converts an LLVM
/// module to a C++ translation unit.
class CppWriter : public ModulePass {
+ std::unique_ptr<formatted_raw_ostream> OutOwner;
formatted_raw_ostream &Out;
const Module *TheModule;
uint64_t uniqueNum;
@@ -105,8 +107,9 @@ namespace {
public:
static char ID;
- explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+ explicit CppWriter(std::unique_ptr<formatted_raw_ostream> o)
+ : ModulePass(ID), OutOwner(std::move(o)), Out(*OutOwner), uniqueNum(0),
+ is_inline(false), indent_level(0) {}
const char *getPassName() const override { return "C++ backend"; }
@@ -1721,7 +1724,7 @@ void CppWriter::printFunctionUses(const Function* F) {
// initializers.
if (GenerationType != GenFunction) {
nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (const auto &GV : gvs) {
+ for (auto *GV : gvs) {
if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV))
printVariableBody(Var);
}
@@ -2146,13 +2149,12 @@ char CppWriter::ID = 0;
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- bool DisableVerify,
- AnalysisID StartAfter,
- AnalysisID StopAfter) {
- if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
- PM.add(new CppWriter(o));
+bool CPPTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &o, CodeGenFileType FileType,
+ bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) {
+ if (FileType != TargetMachine::CGFT_AssemblyFile)
+ return true;
+ auto FOut = llvm::make_unique<formatted_raw_ostream>(o);
+ PM.add(new CppWriter(std::move(FOut)));
return false;
}
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 678a932..02d705e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -29,7 +29,7 @@ struct CPPTargetMachine : public TargetMachine {
: TargetMachine(T, "", TT, CPU, FS, Options) {}
public:
- bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+ bool addPassesToEmitFile(PassManagerBase &PM, raw_pwrite_stream &Out,
CodeGenFileType FileType, bool DisableVerify,
AnalysisID StartAfter,
AnalysisID StopAfter) override;
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index c6ffb96..758ccc7 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_target(HexagonCodeGen
HexagonAsmPrinter.cpp
HexagonCFGOptimizer.cpp
HexagonCopyToCombine.cpp
+ HexagonExpandCondsets.cpp
HexagonExpandPredSpillCode.cpp
HexagonFixupHwLoops.cpp
HexagonFrameLowering.cpp
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 180762f..f0c81e0 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -222,21 +222,6 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- if (SyntaxVariant == 0)
- return(new HexagonInstPrinter(MAI, MII, MRI));
- else
- return nullptr;
-}
-
extern "C" void LLVMInitializeHexagonAsmPrinter() {
RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
-
- TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
- createHexagonMCInstPrinter);
}
diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
new file mode 100644
index 0000000..37ed173
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -0,0 +1,1348 @@
+// Replace mux instructions with the corresponding legal instructions.
+// It is meant to work post-SSA, but still on virtual registers. It was
+// originally placed between register coalescing and machine instruction
+// scheduler.
+// In this place in the optimization sequence, live interval analysis had
+// been performed, and the live intervals should be preserved. A large part
+// of the code deals with preserving the liveness information.
+//
+// Liveness tracking aside, the main functionality of this pass is divided
+// into two steps. The first step is to replace an instruction
+// vreg0 = C2_mux vreg0, vreg1, vreg2
+// with a pair of conditional transfers
+// vreg0 = A2_tfrt vreg0, vreg1
+// vreg0 = A2_tfrf vreg0, vreg2
+// It is the intention that the execution of this pass could be terminated
+// after this step, and the code generated would be functionally correct.
+//
+// If the uses of the source values vreg1 and vreg2 are kills, and their
+// definitions are predicable, then in the second step, the conditional
+// transfers will then be rewritten as predicated instructions. E.g.
+// vreg0 = A2_or vreg1, vreg2
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// will be rewritten as
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// This replacement has two variants: "up" and "down". Consider this case:
+// vreg0 = A2_or vreg1, vreg2
+// ... [intervening instructions] ...
+// vreg3 = A2_tfrt vreg99, vreg0<kill>
+// variant "up":
+// vreg3 = A2_port vreg99, vreg1, vreg2
+// ... [intervening instructions, vreg0->vreg3] ...
+// [deleted]
+// variant "down":
+// [deleted]
+// ... [intervening instructions] ...
+// vreg3 = A2_port vreg99, vreg1, vreg2
+//
+// Both, one or none of these variants may be valid, and checks are made
+// to rule out inapplicable variants.
+//
+// As an additional optimization, before either of the two steps above is
+// executed, the pass attempts to coalesce the target register with one of
+// the source registers, e.g. given an instruction
+// vreg3 = C2_mux vreg0, vreg1, vreg2
+// vreg3 will be coalesced with either vreg1 or vreg2. If this succeeds,
+// the instruction would then be (for example)
+// vreg3 = C2_mux vreg0, vreg3, vreg2
+// and, under certain circumstances, this could result in only one predicated
+// instruction:
+// vreg3 = A2_tfrf vreg0, vreg2
+//
+
+#define DEBUG_TYPE "expand-condsets"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> OptTfrLimit("expand-condsets-tfr-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of mux expansions"));
+static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
+ cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
+
+namespace llvm {
+ void initializeHexagonExpandCondsetsPass(PassRegistry&);
+ FunctionPass *createHexagonExpandCondsets();
+}
+
+namespace {
+ class HexagonExpandCondsets : public MachineFunctionPass {
+ public:
+ static char ID;
+ HexagonExpandCondsets() :
+ MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
+ LIS(0), CoaLimitActive(false),
+ TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
+ if (OptCoaLimit.getPosition())
+ CoaLimitActive = true, CoaLimit = OptCoaLimit;
+ if (OptTfrLimit.getPosition())
+ TfrLimitActive = true, TfrLimit = OptTfrLimit;
+ initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual const char *getPassName() const {
+ return "Hexagon Expand Condsets";
+ }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ const HexagonInstrInfo *HII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ LiveIntervals *LIS;
+
+ bool CoaLimitActive, TfrLimitActive;
+ unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
+
+ struct RegisterRef {
+ RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
+ Sub(Op.getSubReg()) {}
+ RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+ bool operator== (RegisterRef RR) const {
+ return Reg == RR.Reg && Sub == RR.Sub;
+ }
+ bool operator!= (RegisterRef RR) const { return !operator==(RR); }
+ unsigned Reg, Sub;
+ };
+
+ typedef DenseMap<unsigned,unsigned> ReferenceMap;
+ enum { Sub_Low = 0x1, Sub_High = 0x2, Sub_None = (Sub_Low | Sub_High) };
+ enum { Exec_Then = 0x10, Exec_Else = 0x20 };
+ unsigned getMaskForSub(unsigned Sub);
+ bool isCondset(const MachineInstr *MI);
+
+ void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
+ bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
+
+ LiveInterval::iterator nextSegment(LiveInterval &LI, SlotIndex S);
+ LiveInterval::iterator prevSegment(LiveInterval &LI, SlotIndex S);
+ void makeDefined(unsigned Reg, SlotIndex S, bool SetDef);
+ void makeUndead(unsigned Reg, SlotIndex S);
+ void shrinkToUses(unsigned Reg, LiveInterval &LI);
+ void updateKillFlags(unsigned Reg, LiveInterval &LI);
+ void terminateSegment(LiveInterval::iterator LT, SlotIndex S,
+ LiveInterval &LI);
+ void addInstrToLiveness(MachineInstr *MI);
+ void removeInstrFromLiveness(MachineInstr *MI);
+
+ unsigned getCondTfrOpcode(const MachineOperand &SO, bool Cond);
+ MachineInstr *genTfrFor(MachineOperand &SrcOp, unsigned DstR,
+ unsigned DstSR, const MachineOperand &PredOp, bool Cond);
+ bool split(MachineInstr *MI);
+ bool splitInBlock(MachineBasicBlock &B);
+
+ bool isPredicable(MachineInstr *MI);
+ MachineInstr *getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond);
+ bool canMoveOver(MachineInstr *MI, ReferenceMap &Defs, ReferenceMap &Uses);
+ bool canMoveMemTo(MachineInstr *MI, MachineInstr *ToI, bool IsDown);
+ void predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond);
+ void renameInRange(RegisterRef RO, RegisterRef RN, unsigned PredR,
+ bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last);
+ bool predicate(MachineInstr *TfrI, bool Cond);
+ bool predicateInBlock(MachineBasicBlock &B);
+
+ void postprocessUndefImplicitUses(MachineBasicBlock &B);
+ void removeImplicitUses(MachineInstr *MI);
+ void removeImplicitUses(MachineBasicBlock &B);
+
+ bool isIntReg(RegisterRef RR, unsigned &BW);
+ bool isIntraBlocks(LiveInterval &LI);
+ bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
+ bool coalesceSegments(MachineFunction &MF);
+ };
+}
+
+char HexagonExpandCondsets::ID = 0;
+
+
+unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
+ switch (Sub) {
+ case Hexagon::subreg_loreg:
+ return Sub_Low;
+ case Hexagon::subreg_hireg:
+ return Sub_High;
+ case Hexagon::NoSubRegister:
+ return Sub_None;
+ }
+ llvm_unreachable("Invalid subregister");
+}
+
+
+bool HexagonExpandCondsets::isCondset(const MachineInstr *MI) {
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case Hexagon::C2_mux:
+ case Hexagon::C2_muxii:
+ case Hexagon::C2_muxir:
+ case Hexagon::C2_muxri:
+ case Hexagon::MUX64_rr:
+ return true;
+ break;
+ }
+ return false;
+}
+
+
+void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ Map.insert(std::make_pair(RR.Reg, Mask));
+ else
+ F->second |= Mask;
+}
+
+
+bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
+ unsigned Exec) {
+ ReferenceMap::iterator F = Map.find(RR.Reg);
+ if (F == Map.end())
+ return false;
+ unsigned Mask = getMaskForSub(RR.Sub) | Exec;
+ if (Mask & F->second)
+ return true;
+ return false;
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::nextSegment(LiveInterval &LI,
+ SlotIndex S) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->start >= S)
+ return I;
+ }
+ return LI.end();
+}
+
+
+LiveInterval::iterator HexagonExpandCondsets::prevSegment(LiveInterval &LI,
+ SlotIndex S) {
+ LiveInterval::iterator P = LI.end();
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (I->end > S)
+ return P;
+ P = I;
+ }
+ return P;
+}
+
+
+/// Find the implicit use of register Reg in slot index S, and make sure
+/// that the "defined" flag is set to SetDef. While the mux expansion is
+/// going on, predicated instructions will have implicit uses of the
+/// registers that are being defined. This is to keep any preceding
+/// definitions live. If there is no preceding definition, the implicit
+/// use will be marked as "undef", otherwise it will be "defined". This
+/// function is used to update the flag.
+void HexagonExpandCondsets::makeDefined(unsigned Reg, SlotIndex S,
+ bool SetDef) {
+ if (!S.isRegister())
+ return;
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ assert(MI && "Expecting instruction");
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ bool IsDef = !Op.isUndef();
+ if (Op.isImplicit() && IsDef != SetDef)
+ Op.setIsUndef(!SetDef);
+ }
+}
+
+
+void HexagonExpandCondsets::makeUndead(unsigned Reg, SlotIndex S) {
+ // If S is a block boundary, then there can still be a dead def reaching
+ // this point. Instead of traversing the CFG, queue start points of all
+ // live segments that begin with a register, and end at a block boundary.
+ // This may "resurrect" some truly dead definitions, but doing so is
+ // harmless.
+ SmallVector<MachineInstr*,8> Defs;
+ if (S.isBlock()) {
+ LiveInterval &LI = LIS->getInterval(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isRegister() || !I->end.isBlock())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(I->start);
+ Defs.push_back(MI);
+ }
+ } else if (S.isRegister()) {
+ MachineInstr *MI = LIS->getInstructionFromIndex(S);
+ Defs.push_back(MI);
+ }
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ MachineInstr *MI = Defs[i];
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ }
+}
+
+
+/// Shrink the segments in the live interval for a given register to the last
+/// use before each subsequent def. Unlike LiveIntervals::shrinkToUses, this
+/// function will not mark any definitions of Reg as dead. The reason for this
+/// is that this function is used while a MUX instruction is being expanded,
+/// or while a conditional copy is undergoing predication. During these
+/// processes, there may be defs present in the instruction sequence that have
+/// not yet been removed, or there may be missing uses that have not yet been
+/// added. We want to utilize LiveIntervals::shrinkToUses as much as possible,
+/// but since it does not extend any intervals that are too short, we need to
+/// pre-emptively extend them here in anticipation of further changes.
+void HexagonExpandCondsets::shrinkToUses(unsigned Reg, LiveInterval &LI) {
+ SmallVector<MachineInstr*,4> Deads;
+ LIS->shrinkToUses(&LI, &Deads);
+ // Need to undo the deadification made by "shrinkToUses". It's easier to
+ // do it here, since we have a list of all instructions that were just
+ // marked as dead.
+ for (unsigned i = 0, n = Deads.size(); i < n; ++i) {
+ MachineInstr *MI = Deads[i];
+ // Clear the "dead" flag.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ continue;
+ Op.setIsDead(false);
+ }
+ // Extend the live segment to the beginning of the next one.
+ LiveInterval::iterator End = LI.end();
+ SlotIndex S = LIS->getInstructionIndex(MI).getRegSlot();
+ LiveInterval::iterator T = LI.FindSegmentContaining(S);
+ assert(T != End);
+ LiveInterval::iterator N = std::next(T);
+ if (N != End)
+ T->end = N->start;
+ else
+ T->end = LIS->getMBBEndIdx(MI->getParent());
+ }
+ updateKillFlags(Reg, LI);
+}
+
+
+/// Given an updated live interval LI for register Reg, update the kill flags
+/// in instructions using Reg to reflect the liveness changes.
+void HexagonExpandCondsets::updateKillFlags(unsigned Reg, LiveInterval &LI) {
+ MRI->clearKillFlags(Reg);
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ SlotIndex EX = I->end;
+ if (!EX.isRegister())
+ continue;
+ MachineInstr *MI = LIS->getInstructionFromIndex(EX);
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.getReg() != Reg)
+ continue;
+ // Only set the kill flag on the first encountered use of Reg in this
+ // instruction.
+ Op.setIsKill(true);
+ break;
+ }
+ }
+}
+
+
+/// When adding a new instruction to liveness, the newly added definition
+/// will start a new live segment. This may happen at a position that falls
+/// within an existing live segment. In such case that live segment needs to
+/// be truncated to make room for the new segment. Ultimately, the truncation
+/// will occur at the last use, but for now the segment can be terminated
+/// right at the place where the new segment will start. The segments will be
+/// shrunk-to-uses later.
+void HexagonExpandCondsets::terminateSegment(LiveInterval::iterator LT,
+ SlotIndex S, LiveInterval &LI) {
+ // Terminate the live segment pointed to by LT within a live interval LI.
+ if (LT == LI.end())
+ return;
+
+ VNInfo *OldVN = LT->valno;
+ SlotIndex EX = LT->end;
+ LT->end = S;
+ // If LT does not end at a block boundary, the termination is done.
+ if (!EX.isBlock())
+ return;
+
+ // If LT ended at a block boundary, it's possible that its value number
+ // is picked up at the beginning other blocks. Create a new value number
+ // and change such blocks to use it instead.
+ VNInfo *NewVN = 0;
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ if (!I->start.isBlock() || I->valno != OldVN)
+ continue;
+ // Generate on-demand a new value number that is defined by the
+ // block beginning (i.e. -phi).
+ if (!NewVN)
+ NewVN = LI.getNextValue(I->start, LIS->getVNInfoAllocator());
+ I->valno = NewVN;
+ }
+}
+
+
+/// Add the specified instruction to live intervals. This function is used
+/// to update the live intervals while the program code is being changed.
+/// Neither the expansion of a MUX, nor the predication are atomic, and this
+/// function is used to update the live intervals while these transformations
+/// are being done.
+void HexagonExpandCondsets::addInstrToLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->isNotInMIMap(MI) ? LIS->InsertMachineInstrInMaps(MI)
+ : LIS->getInstructionIndex(MI);
+ DEBUG(dbgs() << "adding liveness info for instr\n " << MX << " " << *MI);
+
+ MX = MX.getRegSlot();
+ bool Predicated = HII->isPredicated(MI);
+ MachineBasicBlock *MB = MI->getParent();
+
+ // Strip all implicit uses from predicated instructions. They will be
+ // added again, according to the updated information.
+ if (Predicated)
+ removeImplicitUses(MI);
+
+ // For each def in MI we need to insert a new live segment starting at MX
+ // into the interval. If there already exists a live segment in the interval
+ // that contains MX, we need to terminate it at MX.
+ SmallVector<RegisterRef,2> Defs;
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && Op.isDef())
+ Defs.push_back(RegisterRef(Op));
+
+ for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+ unsigned DefR = Defs[i].Reg;
+ LiveInterval &LID = LIS->getInterval(DefR);
+ DEBUG(dbgs() << "adding def " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ // If MX falls inside of an existing live segment, terminate it.
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ if (LT != LID.end())
+ terminateSegment(LT, MX, LID);
+ DEBUG(dbgs() << "after terminating segment\n " << LID << "\n");
+
+ // Create a new segment starting from MX.
+ LiveInterval::iterator P = prevSegment(LID, MX), N = nextSegment(LID, MX);
+ SlotIndex EX;
+ VNInfo *VN = LID.getNextValue(MX, LIS->getVNInfoAllocator());
+ if (N == LID.end()) {
+ // There is no live segment after MX. End this segment at the end of
+ // the block.
+ EX = LIS->getMBBEndIdx(MB);
+ } else {
+ // If the next segment starts at the block boundary, end the new segment
+ // at the boundary of the preceding block (i.e. the previous index).
+ // Otherwise, end the segment at the beginning of the next segment. In
+ // either case it will be "shrunk-to-uses" later.
+ EX = N->start.isBlock() ? N->start.getPrevIndex() : N->start;
+ }
+ if (Predicated) {
+ // Predicated instruction will have an implicit use of the defined
+ // register. This is necessary so that this definition will not make
+ // any previous definitions dead. If there are no previous live
+ // segments, still add the implicit use, but make it "undef".
+ // Because of the implicit use, the preceding definition is not
+ // dead. Mark is as such (if necessary).
+ MachineOperand ImpUse = MachineOperand::CreateReg(DefR, false, true);
+ ImpUse.setSubReg(Defs[i].Sub);
+ bool Undef = false;
+ if (P == LID.end())
+ Undef = true;
+ else {
+ // If the previous segment extends to the end of the previous block,
+ // the end index may actually be the beginning of this block. If
+ // the previous segment ends at a block boundary, move it back by one,
+ // to get the proper block for it.
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB))
+ Undef = true;
+ }
+ if (!Undef) {
+ makeUndead(DefR, P->valno->def);
+ // We are adding a live use, so extend the previous segment to
+ // include it.
+ P->end = MX;
+ } else {
+ ImpUse.setIsUndef(true);
+ }
+
+ if (!MI->readsRegister(DefR))
+ MI->addOperand(ImpUse);
+ if (N != LID.end())
+ makeDefined(DefR, N->start, true);
+ }
+ LiveRange::Segment NR = LiveRange::Segment(MX, EX, VN);
+ LID.addSegment(NR);
+ DEBUG(dbgs() << "added a new segment " << NR << "\n " << LID << "\n");
+ shrinkToUses(DefR, LID);
+ DEBUG(dbgs() << "updated imp-uses: " << *MI);
+ LID.verify();
+ }
+
+ // For each use in MI:
+ // - If there is no live segment that contains MX for the used register,
+ // extend the previous one. Ignore implicit uses.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse() || Op.isImplicit() || Op.isUndef())
+ continue;
+ unsigned UseR = Op.getReg();
+ LiveInterval &LIU = LIS->getInterval(UseR);
+ // Find the last segment P that starts before MX.
+ LiveInterval::iterator P = LIU.FindSegmentContaining(MX);
+ if (P == LIU.end())
+ P = prevSegment(LIU, MX);
+
+ assert(P != LIU.end() && "MI uses undefined register?");
+ SlotIndex EX = P->end;
+ // If P contains MX, there is not much to do.
+ if (EX > MX) {
+ Op.setIsKill(false);
+ continue;
+ }
+ // Otherwise, extend P to "next(MX)".
+ P->end = MX.getNextIndex();
+ Op.setIsKill(true);
+ // Get the old "kill" instruction, and remove the kill flag.
+ if (MachineInstr *KI = LIS->getInstructionFromIndex(MX))
+ KI->clearRegisterKills(UseR, nullptr);
+ shrinkToUses(UseR, LIU);
+ LIU.verify();
+ }
+}
+
+
+/// Update the live interval information to reflect the removal of the given
+/// instruction from the program. As with "addInstrToLiveness", this function
+/// is called while the program code is being changed.
+void HexagonExpandCondsets::removeInstrFromLiveness(MachineInstr *MI) {
+ SlotIndex MX = LIS->getInstructionIndex(MI).getRegSlot();
+ DEBUG(dbgs() << "removing instr\n " << MX << " " << *MI);
+
+ // For each def in MI:
+ // If MI starts a live segment, merge this segment with the previous segment.
+ //
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned DefR = Op.getReg();
+ LiveInterval &LID = LIS->getInterval(DefR);
+ LiveInterval::iterator LT = LID.FindSegmentContaining(MX);
+ assert(LT != LID.end() && "Expecting live segments");
+ DEBUG(dbgs() << "removing def at " << MX << " of " << PrintReg(DefR, TRI)
+ << " with interval\n " << LID << "\n");
+ if (LT->start != MX)
+ continue;
+
+ VNInfo *MVN = LT->valno;
+ if (LT != LID.begin()) {
+ // If the current live segment is not the first, the task is easy. If
+ // the previous segment continues into the current block, extend it to
+ // the end of the current one, and merge the value numbers.
+ // Otherwise, remove the current segment, and make the end of it "undef".
+ LiveInterval::iterator P = std::prev(LT);
+ SlotIndex PE = P->end.isBlock() ? P->end.getPrevIndex() : P->end;
+ MachineBasicBlock *MB = MI->getParent();
+ MachineBasicBlock *PB = LIS->getMBBFromIndex(PE);
+ if (PB != MB && !LIS->isLiveInToMBB(LID, MB)) {
+ makeDefined(DefR, LT->end, false);
+ LID.removeSegment(*LT);
+ } else {
+ // Make the segments adjacent, so that merge-vn can also merge the
+ // segments.
+ P->end = LT->start;
+ makeUndead(DefR, P->valno->def);
+ LID.MergeValueNumberInto(MVN, P->valno);
+ }
+ } else {
+ LiveInterval::iterator N = std::next(LT);
+ LiveInterval::iterator RmB = LT, RmE = N;
+ while (N != LID.end()) {
+ // Iterate until the first register-based definition is found
+ // (i.e. skip all block-boundary entries).
+ LiveInterval::iterator Next = std::next(N);
+ if (N->start.isRegister()) {
+ makeDefined(DefR, N->start, false);
+ break;
+ }
+ if (N->end.isRegister()) {
+ makeDefined(DefR, N->end, false);
+ RmE = Next;
+ break;
+ }
+ RmE = Next;
+ N = Next;
+ }
+ // Erase the segments in one shot to avoid invalidating iterators.
+ LID.segments.erase(RmB, RmE);
+ }
+
+ bool VNUsed = false;
+ for (LiveInterval::iterator I = LID.begin(), E = LID.end(); I != E; ++I) {
+ if (I->valno != MVN)
+ continue;
+ VNUsed = true;
+ break;
+ }
+ if (!VNUsed)
+ MVN->markUnused();
+
+ DEBUG(dbgs() << "new interval: ");
+ if (!LID.empty()) {
+ DEBUG(dbgs() << LID << "\n");
+ LID.verify();
+ } else {
+ DEBUG(dbgs() << "<empty>\n");
+ LIS->removeInterval(DefR);
+ }
+ }
+
+ // For uses there is nothing to do. The intervals will be updated via
+ // shrinkToUses.
+ SmallVector<unsigned,4> Uses;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(R))
+ continue;
+ Uses.push_back(R);
+ }
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ for (unsigned i = 0, n = Uses.size(); i < n; ++i) {
+ LiveInterval &LI = LIS->getInterval(Uses[i]);
+ shrinkToUses(Uses[i], LI);
+ }
+}
+
+
+/// Get the opcode for a conditional transfer of the value in SO (source
+/// operand). The condition (true/false) is given in Cond.
+unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
+ bool Cond) {
+ using namespace Hexagon;
+ if (SO.isReg()) {
+ unsigned PhysR;
+ RegisterRef RS = SO;
+ if (TargetRegisterInfo::isVirtualRegister(RS.Reg)) {
+ const TargetRegisterClass *VC = MRI->getRegClass(RS.Reg);
+ assert(VC->begin() != VC->end() && "Empty register class");
+ PhysR = *VC->begin();
+ } else {
+ assert(TargetRegisterInfo::isPhysicalRegister(RS.Reg));
+ PhysR = RS.Reg;
+ }
+ unsigned PhysS = (RS.Sub == 0) ? PhysR : TRI->getSubReg(PhysR, RS.Sub);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysS);
+ switch (RC->getSize()) {
+ case 4:
+ return Cond ? A2_tfrt : A2_tfrf;
+ case 8:
+ return Cond ? A2_tfrpt : A2_tfrpf;
+ }
+ llvm_unreachable("Invalid register operand");
+ }
+ if (SO.isImm() || SO.isFPImm())
+ return Cond ? C2_cmoveit : C2_cmoveif;
+ llvm_unreachable("Unexpected source operand");
+}
+
+
+/// Generate a conditional transfer, copying the value SrcOp to the
+/// destination register DstR:DstSR, and using the predicate register from
+/// PredOp. The Cond argument specifies whether the predicate is to be
+/// if(PredOp), or if(!PredOp).
+MachineInstr *HexagonExpandCondsets::genTfrFor(MachineOperand &SrcOp,
+ unsigned DstR, unsigned DstSR, const MachineOperand &PredOp, bool Cond) {
+ MachineInstr *MI = SrcOp.getParent();
+ MachineBasicBlock &B = *MI->getParent();
+ MachineBasicBlock::iterator At = MI;
+ DebugLoc DL = MI->getDebugLoc();
+
+ // Don't avoid identity copies here (i.e. if the source and the destination
+ // are the same registers). It is actually better to generate them here,
+ // since this would cause the copy to potentially be predicated in the next
+ // step. The predication will remove such a copy if it is unable to
+ /// predicate.
+
+ unsigned Opc = getCondTfrOpcode(SrcOp, Cond);
+ MachineInstr *TfrI = BuildMI(B, At, DL, HII->get(Opc))
+ .addReg(DstR, RegState::Define, DstSR)
+ .addOperand(PredOp)
+ .addOperand(SrcOp);
+ // We don't want any kills yet.
+ TfrI->clearKillInfo();
+ DEBUG(dbgs() << "created an initial copy: " << *TfrI);
+ return TfrI;
+}
+
+
+/// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
+/// performs all necessary changes to complete the replacement.
+bool HexagonExpandCondsets::split(MachineInstr *MI) {
+ if (TfrLimitActive) {
+ if (TfrCounter >= TfrLimit)
+ return false;
+ TfrCounter++;
+ }
+ DEBUG(dbgs() << "\nsplitting BB#" << MI->getParent()->getNumber()
+ << ": " << *MI);
+ MachineOperand &MD = MI->getOperand(0); // Definition
+ MachineOperand &MP = MI->getOperand(1); // Predicate register
+ assert(MD.isDef());
+ unsigned DR = MD.getReg(), DSR = MD.getSubReg();
+
+ // First, create the two invididual conditional transfers, and add each
+ // of them to the live intervals information. Do that first and then remove
+ // the old instruction from live intervals.
+ if (MachineInstr *TfrT = genTfrFor(MI->getOperand(2), DR, DSR, MP, true))
+ addInstrToLiveness(TfrT);
+ if (MachineInstr *TfrF = genTfrFor(MI->getOperand(3), DR, DSR, MP, false))
+ addInstrToLiveness(TfrF);
+ removeInstrFromLiveness(MI);
+
+ return true;
+}
+
+
+/// Split all MUX instructions in the given block into pairs of contitional
+/// transfers.
+bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ if (isCondset(I))
+ Changed |= split(I);
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
+ if (HII->isPredicated(MI) || !HII->isPredicable(MI))
+ return false;
+ if (MI->hasUnmodeledSideEffects() || MI->mayStore())
+ return false;
+ // Reject instructions with multiple defs (e.g. post-increment loads).
+ bool HasDef = false;
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ if (HasDef)
+ return false;
+ HasDef = true;
+ }
+ for (auto &Mo : MI->memoperands())
+ if (Mo->isVolatile())
+ return false;
+ return true;
+}
+
+
+/// Find the reaching definition for a predicated use of RD. The RD is used
+/// under the conditions given by PredR and Cond, and this function will ignore
+/// definitions that set RD under the opposite conditions.
+MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
+ MachineBasicBlock::iterator UseIt, unsigned PredR, bool Cond) {
+ MachineBasicBlock &B = *UseIt->getParent();
+ MachineBasicBlock::iterator I = UseIt, S = B.begin();
+ if (I == S)
+ return 0;
+
+ bool PredValid = true;
+ do {
+ --I;
+ MachineInstr *MI = &*I;
+ // Check if this instruction can be ignored, i.e. if it is predicated
+ // on the complementary condition.
+ if (PredValid && HII->isPredicated(MI)) {
+ if (MI->readsRegister(PredR) && (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+ }
+
+ // Check the defs. If the PredR is defined, invalidate it. If RD is
+ // defined, return the instruction or 0, depending on the circumstances.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ RegisterRef RR = Op;
+ if (RR.Reg == PredR) {
+ PredValid = false;
+ continue;
+ }
+ if (RR.Reg != RD.Reg)
+ continue;
+ // If the "Reg" part agrees, there is still the subregister to check.
+ // If we are looking for vreg1:loreg, we can skip vreg1:hireg, but
+ // not vreg1 (w/o subregisters).
+ if (RR.Sub == RD.Sub)
+ return MI;
+ if (RR.Sub == 0 || RD.Sub == 0)
+ return 0;
+ // We have different subregisters, so we can continue looking.
+ }
+ } while (I != S);
+
+ return 0;
+}
+
+
+/// Check if the instruction MI can be safely moved over a set of instructions
+/// whose side-effects (in terms of register defs and uses) are expressed in
+/// the maps Defs and Uses. These maps reflect the conditional defs and uses
+/// that depend on the same predicate register to allow moving instructions
+/// over instructions predicated on the opposite condition.
+bool HexagonExpandCondsets::canMoveOver(MachineInstr *MI, ReferenceMap &Defs,
+ ReferenceMap &Uses) {
+ // In order to be able to safely move MI over instructions that define
+ // "Defs" and use "Uses", no def operand from MI can be defined or used
+ // and no use operand can be defined.
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ RegisterRef RR = Op;
+ // For physical register we would need to check register aliases, etc.
+ // and we don't want to bother with that. It would be of little value
+ // before the actual register rewriting (from virtual to physical).
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ // No redefs for any operand.
+ if (isRefInMap(RR, Defs, Exec_Then))
+ return false;
+ // For defs, there cannot be uses.
+ if (Op.isDef() && isRefInMap(RR, Uses, Exec_Then))
+ return false;
+ }
+ return true;
+}
+
+
+/// Check if the instruction accessing memory (TheI) can be moved to the
+/// location ToI.
+bool HexagonExpandCondsets::canMoveMemTo(MachineInstr *TheI, MachineInstr *ToI,
+ bool IsDown) {
+ bool IsLoad = TheI->mayLoad(), IsStore = TheI->mayStore();
+ if (!IsLoad && !IsStore)
+ return true;
+ if (HII->areMemAccessesTriviallyDisjoint(TheI, ToI))
+ return true;
+ if (TheI->hasUnmodeledSideEffects())
+ return false;
+
+ MachineBasicBlock::iterator StartI = IsDown ? TheI : ToI;
+ MachineBasicBlock::iterator EndI = IsDown ? ToI : TheI;
+ bool Ordered = TheI->hasOrderedMemoryRef();
+
+ // Search for aliased memory reference in (StartI, EndI).
+ for (MachineBasicBlock::iterator I = std::next(StartI); I != EndI; ++I) {
+ MachineInstr *MI = &*I;
+ if (MI->hasUnmodeledSideEffects())
+ return false;
+ bool L = MI->mayLoad(), S = MI->mayStore();
+ if (!L && !S)
+ continue;
+ if (Ordered && MI->hasOrderedMemoryRef())
+ return false;
+
+ bool Conflict = (L && IsStore) || S;
+ if (Conflict)
+ return false;
+ }
+ return true;
+}
+
+
+/// Generate a predicated version of MI (where the condition is given via
+/// PredR and Cond) at the point indicated by Where.
+void HexagonExpandCondsets::predicateAt(RegisterRef RD, MachineInstr *MI,
+ MachineBasicBlock::iterator Where, unsigned PredR, bool Cond) {
+ // The problem with updating live intervals is that we can move one def
+ // past another def. In particular, this can happen when moving an A2_tfrt
+ // over an A2_tfrf defining the same register. From the point of view of
+ // live intervals, these two instructions are two separate definitions,
+ // and each one starts another live segment. LiveIntervals's "handleMove"
+ // does not allow such moves, so we need to handle it ourselves. To avoid
+ // invalidating liveness data while we are using it, the move will be
+ // implemented in 4 steps: (1) add a clone of the instruction MI at the
+ // target location, (2) update liveness, (3) delete the old instruction,
+ // and (4) update liveness again.
+
+ MachineBasicBlock &B = *MI->getParent();
+ DebugLoc DL = Where->getDebugLoc(); // "Where" points to an instruction.
+ unsigned Opc = MI->getOpcode();
+ unsigned PredOpc = HII->getCondOpcode(Opc, !Cond);
+ MachineInstrBuilder MB = BuildMI(B, Where, DL, HII->get(PredOpc));
+ unsigned Ox = 0, NP = MI->getNumOperands();
+ // Skip all defs from MI first.
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isDef())
+ break;
+ Ox++;
+ }
+ // Add the new def, then the predicate register, then the rest of the
+ // operands.
+ MB.addReg(RD.Reg, RegState::Define, RD.Sub);
+ MB.addReg(PredR);
+ while (Ox < NP) {
+ MachineOperand &MO = MI->getOperand(Ox);
+ if (!MO.isReg() || !MO.isImplicit())
+ MB.addOperand(MO);
+ Ox++;
+ }
+
+ MachineFunction &MF = *B.getParent();
+ MachineInstr::mmo_iterator I = MI->memoperands_begin();
+ unsigned NR = std::distance(I, MI->memoperands_end());
+ MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(NR);
+ for (unsigned i = 0; i < NR; ++i)
+ MemRefs[i] = *I++;
+ MB.setMemRefs(MemRefs, MemRefs+NR);
+
+ MachineInstr *NewI = MB;
+ NewI->clearKillInfo();
+ addInstrToLiveness(NewI);
+}
+
+
+/// In the range [First, Last], rename all references to the "old" register RO
+/// to the "new" register RN, but only in instructions predicated on the given
+/// condition.
+void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
+ unsigned PredR, bool Cond, MachineBasicBlock::iterator First,
+ MachineBasicBlock::iterator Last) {
+ MachineBasicBlock::iterator End = std::next(Last);
+ for (MachineBasicBlock::iterator I = First; I != End; ++I) {
+ MachineInstr *MI = &*I;
+ // Do not touch instructions that are not predicated, or are predicated
+ // on the opposite condition.
+ if (!HII->isPredicated(MI))
+ continue;
+ if (!MI->readsRegister(PredR) || (Cond != HII->isPredicatedTrue(MI)))
+ continue;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || RO != RegisterRef(Op))
+ continue;
+ Op.setReg(RN.Reg);
+ Op.setSubReg(RN.Sub);
+ // In practice, this isn't supposed to see any defs.
+ assert(!Op.isDef() && "Not expecting a def");
+ }
+ }
+}
+
+
+/// For a given conditional copy, predicate the definition of the source of
+/// the copy under the given condition (using the same predicate register as
+/// the copy).
+bool HexagonExpandCondsets::predicate(MachineInstr *TfrI, bool Cond) {
+ // TfrI - A2_tfr[tf] Instruction (not A2_tfrsi).
+ unsigned Opc = TfrI->getOpcode();
+ (void)Opc;
+ assert(Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf);
+ DEBUG(dbgs() << "\nattempt to predicate if-" << (Cond ? "true" : "false")
+ << ": " << *TfrI);
+
+ MachineOperand &MD = TfrI->getOperand(0);
+ MachineOperand &MP = TfrI->getOperand(1);
+ MachineOperand &MS = TfrI->getOperand(2);
+ // The source operand should be a <kill>. This is not strictly necessary,
+ // but it makes things a lot simpler. Otherwise, we would need to rename
+ // some registers, which would complicate the transformation considerably.
+ if (!MS.isKill())
+ return false;
+
+ RegisterRef RT(MS);
+ unsigned PredR = MP.getReg();
+ MachineInstr *DefI = getReachingDefForPred(RT, TfrI, PredR, Cond);
+ if (!DefI || !isPredicable(DefI))
+ return false;
+
+ DEBUG(dbgs() << "Source def: " << *DefI);
+
+ // Collect the information about registers defined and used between the
+ // DefI and the TfrI.
+ // Map: reg -> bitmask of subregs
+ ReferenceMap Uses, Defs;
+ MachineBasicBlock::iterator DefIt = DefI, TfrIt = TfrI;
+
+ // Check if the predicate register is valid between DefI and TfrI.
+ // If it is, we can then ignore instructions predicated on the negated
+ // conditions when collecting def and use information.
+ bool PredValid = true;
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ if (!I->modifiesRegister(PredR, 0))
+ continue;
+ PredValid = false;
+ break;
+ }
+
+ for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
+ MachineInstr *MI = &*I;
+ // If this instruction is predicated on the same register, it could
+ // potentially be ignored.
+ // By default assume that the instruction executes on the same condition
+ // as TfrI (Exec_Then), and also on the opposite one (Exec_Else).
+ unsigned Exec = Exec_Then | Exec_Else;
+ if (PredValid && HII->isPredicated(MI) && MI->readsRegister(PredR))
+ Exec = (Cond == HII->isPredicatedTrue(MI)) ? Exec_Then : Exec_Else;
+
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg())
+ continue;
+ // We don't want to deal with physical registers. The reason is that
+ // they can be aliased with other physical registers. Aliased virtual
+ // registers must share the same register number, and can only differ
+ // in the subregisters, which we are keeping track of. Physical
+ // registers ters no longer have subregisters---their super- and
+ // subregisters are other physical registers, and we are not checking
+ // that.
+ RegisterRef RR = Op;
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+
+ ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+ addRefToMap(RR, Map, Exec);
+ }
+ }
+
+ // The situation:
+ // RT = DefI
+ // ...
+ // RD = TfrI ..., RT
+
+ // If the register-in-the-middle (RT) is used or redefined between
+ // DefI and TfrI, we may not be able proceed with this transformation.
+ // We can ignore a def that will not execute together with TfrI, and a
+ // use that will. If there is such a use (that does execute together with
+ // TfrI), we will not be able to move DefI down. If there is a use that
+ // executed if TfrI's condition is false, then RT must be available
+ // unconditionally (cannot be predicated).
+ // Essentially, we need to be able to rename RT to RD in this segment.
+ if (isRefInMap(RT, Defs, Exec_Then) || isRefInMap(RT, Uses, Exec_Else))
+ return false;
+ RegisterRef RD = MD;
+ // If the predicate register is defined between DefI and TfrI, the only
+ // potential thing to do would be to move the DefI down to TfrI, and then
+ // predicate. The reaching def (DefI) must be movable down to the location
+ // of the TfrI.
+ // If the target register of the TfrI (RD) is not used or defined between
+ // DefI and TfrI, consider moving TfrI up to DefI.
+ bool CanUp = canMoveOver(TfrI, Defs, Uses);
+ bool CanDown = canMoveOver(DefI, Defs, Uses);
+ // The TfrI does not access memory, but DefI could. Check if it's safe
+ // to move DefI down to TfrI.
+ if (DefI->mayLoad() || DefI->mayStore())
+ if (!canMoveMemTo(DefI, TfrI, true))
+ CanDown = false;
+
+ DEBUG(dbgs() << "Can move up: " << (CanUp ? "yes" : "no")
+ << ", can move down: " << (CanDown ? "yes\n" : "no\n"));
+ MachineBasicBlock::iterator PastDefIt = std::next(DefIt);
+ if (CanUp)
+ predicateAt(RD, DefI, PastDefIt, PredR, Cond);
+ else if (CanDown)
+ predicateAt(RD, DefI, TfrIt, PredR, Cond);
+ else
+ return false;
+
+ if (RT != RD)
+ renameInRange(RT, RD, PredR, Cond, PastDefIt, TfrIt);
+
+ // Delete the user of RT first (it should work either way, but this order
+ // of deleting is more natural).
+ removeInstrFromLiveness(TfrI);
+ removeInstrFromLiveness(DefI);
+ return true;
+}
+
+
+/// Predicate all cases of conditional copies in the specified block.
+bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B) {
+ bool Changed = false;
+ MachineBasicBlock::iterator I, E, NextI;
+ for (I = B.begin(), E = B.end(); I != E; I = NextI) {
+ NextI = std::next(I);
+ unsigned Opc = I->getOpcode();
+ if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
+ bool Done = predicate(I, (Opc == Hexagon::A2_tfrt));
+ if (!Done) {
+ // If we didn't predicate I, we may need to remove it in case it is
+ // an "identity" copy, e.g. vreg1 = A2_tfrt vreg2, vreg1.
+ if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2)))
+ removeInstrFromLiveness(I);
+ }
+ Changed |= Done;
+ }
+ }
+ return Changed;
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineInstr *MI) {
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit())
+ MI->RemoveOperand(i-1);
+ }
+}
+
+
+void HexagonExpandCondsets::removeImplicitUses(MachineBasicBlock &B) {
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (HII->isPredicated(MI))
+ removeImplicitUses(MI);
+ }
+}
+
+
+void HexagonExpandCondsets::postprocessUndefImplicitUses(MachineBasicBlock &B) {
+ // Implicit uses that are "undef" are only meaningful (outside of the
+ // internals of this pass) when the instruction defines a subregister,
+ // and the implicit-undef use applies to the defined register. In such
+ // cases, the proper way to record the information in the IR is to mark
+ // the definition as "undef", which will be interpreted as "read-undef".
+ typedef SmallSet<unsigned,2> RegisterSet;
+ for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ RegisterSet Undefs;
+ for (unsigned i = MI->getNumOperands(); i > 0; --i) {
+ MachineOperand &MO = MI->getOperand(i-1);
+ if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.isUndef()) {
+ MI->RemoveOperand(i-1);
+ Undefs.insert(MO.getReg());
+ }
+ }
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef() || !Op.getSubReg())
+ continue;
+ if (Undefs.count(Op.getReg()))
+ Op.setIsUndef(true);
+ }
+ }
+}
+
+
+bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
+ if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(RR.Reg);
+ if (RC == &Hexagon::IntRegsRegClass) {
+ BW = 32;
+ return true;
+ }
+ if (RC == &Hexagon::DoubleRegsRegClass) {
+ BW = (RR.Sub != 0) ? 32 : 64;
+ return true;
+ }
+ return false;
+}
+
+
+bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
+ for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
+ LiveRange::Segment &LR = *I;
+ // Range must start at a register...
+ if (!LR.start.isRegister())
+ return false;
+ // ...and end in a register or in a dead slot.
+ if (!LR.end.isRegister() && !LR.end.isDead())
+ return false;
+ }
+ return true;
+}
+
+
+bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
+ if (CoaLimitActive) {
+ if (CoaCounter >= CoaLimit)
+ return false;
+ CoaCounter++;
+ }
+ unsigned BW1, BW2;
+ if (!isIntReg(R1, BW1) || !isIntReg(R2, BW2) || BW1 != BW2)
+ return false;
+ if (MRI->isLiveIn(R1.Reg))
+ return false;
+ if (MRI->isLiveIn(R2.Reg))
+ return false;
+
+ LiveInterval &L1 = LIS->getInterval(R1.Reg);
+ LiveInterval &L2 = LIS->getInterval(R2.Reg);
+ bool Overlap = L1.overlaps(L2);
+
+ DEBUG(dbgs() << "compatible registers: ("
+ << (Overlap ? "overlap" : "disjoint") << ")\n "
+ << PrintReg(R1.Reg, TRI, R1.Sub) << " " << L1 << "\n "
+ << PrintReg(R2.Reg, TRI, R2.Sub) << " " << L2 << "\n");
+ if (R1.Sub || R2.Sub)
+ return false;
+ if (Overlap)
+ return false;
+
+ // Coalescing could have a negative impact on scheduling, so try to limit
+ // to some reasonable extent. Only consider coalescing segments, when one
+ // of them does not cross basic block boundaries.
+ if (!isIntraBlocks(L1) && !isIntraBlocks(L2))
+ return false;
+
+ MRI->replaceRegWith(R2.Reg, R1.Reg);
+
+ // Move all live segments from L2 to L1.
+ typedef DenseMap<VNInfo*,VNInfo*> ValueInfoMap;
+ ValueInfoMap VM;
+ for (LiveInterval::iterator I = L2.begin(), E = L2.end(); I != E; ++I) {
+ VNInfo *NewVN, *OldVN = I->valno;
+ ValueInfoMap::iterator F = VM.find(OldVN);
+ if (F == VM.end()) {
+ NewVN = L1.getNextValue(I->valno->def, LIS->getVNInfoAllocator());
+ VM.insert(std::make_pair(OldVN, NewVN));
+ } else {
+ NewVN = F->second;
+ }
+ L1.addSegment(LiveRange::Segment(I->start, I->end, NewVN));
+ }
+ while (L2.begin() != L2.end())
+ L2.removeSegment(*L2.begin());
+
+ updateKillFlags(R1.Reg, L1);
+ DEBUG(dbgs() << "coalesced: " << L1 << "\n");
+ L1.verify();
+
+ return true;
+}
+
+
+/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// the destination register. This could lead to having only one predicated
+/// instruction in the end instead of two.
+bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
+ SmallVector<MachineInstr*,16> Condsets;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock &B = *I;
+ for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
+ MachineInstr *MI = &*J;
+ if (!isCondset(MI))
+ continue;
+ MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+ if (!S1.isReg() && !S2.isReg())
+ continue;
+ Condsets.push_back(MI);
+ }
+ }
+
+ bool Changed = false;
+ for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
+ MachineInstr *CI = Condsets[i];
+ RegisterRef RD = CI->getOperand(0);
+ RegisterRef RP = CI->getOperand(1);
+ MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
+ bool Done = false;
+ // Consider this case:
+ // vreg1 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = C2_mux ..., vreg1, vreg2
+ // If vreg0 was coalesced with vreg1, we could end up with the following
+ // code:
+ // vreg0 = instr1 ...
+ // vreg2 = instr2 ...
+ // vreg0 = A2_tfrf ..., vreg2
+ // which will later become:
+ // vreg0 = instr1 ...
+ // vreg0 = instr2_cNotPt ...
+ // i.e. there will be an unconditional definition (instr1) of vreg0
+ // followed by a conditional one. The output dependency was there before
+ // and it unavoidable, but if instr1 is predicable, we will no longer be
+ // able to predicate it here.
+ // To avoid this scenario, don't coalesce the destination register with
+ // a source register that is defined by a predicable instruction.
+ if (S1.isReg()) {
+ RegisterRef RS = S1;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S1));
+ }
+ if (!Done && S2.isReg()) {
+ RegisterRef RS = S2;
+ MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
+ if (!RDef || !HII->isPredicable(RDef))
+ Done = coalesceRegisters(RD, RegisterRef(S2));
+ }
+ Changed |= Done;
+ }
+ return Changed;
+}
+
+
+bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
+ HII = static_cast<const HexagonInstrInfo*>(MF.getSubtarget().getInstrInfo());
+ TRI = MF.getSubtarget().getRegisterInfo();
+ LIS = &getAnalysis<LiveIntervals>();
+ MRI = &MF.getRegInfo();
+
+ bool Changed = false;
+
+ // Try to coalesce the target of a mux with one of its sources.
+ // This could eliminate a register copy in some circumstances.
+ Changed |= coalesceSegments(MF);
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ // First, simply split all muxes into a pair of conditional transfers
+ // and update the live intervals to reflect the new arrangement.
+ // This is done mainly to make the live interval update simpler, than it
+ // would be while trying to predicate instructions at the same time.
+ Changed |= splitInBlock(*I);
+ // Traverse all blocks and collapse predicable instructions feeding
+ // conditional transfers into predicated instructions.
+ // Walk over all the instructions again, so we may catch pre-existing
+ // cases that were not created in the previous step.
+ Changed |= predicateInBlock(*I);
+ }
+
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+ postprocessUndefImplicitUses(*I);
+ return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "Hexagon Expand Condsets";
+ PassInfo *PI = new PassInfo(Name, "expand-condsets",
+ &HexagonExpandCondsets::ID, 0, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializeHexagonExpandCondsetsPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce)
+}
+
+
+FunctionPass *llvm::createHexagonExpandCondsets() {
+ return new HexagonExpandCondsets();
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index a2209ab..63900e0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -316,6 +316,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false,
MachinePointerInfo(), MachinePointerInfo());
}
@@ -1716,6 +1717,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBC, MVT::i32, Expand);
setOperationAction(ISD::SUBC, MVT::i64, Expand);
+ // Only add and sub that detect overflow are the saturating ones.
+ for (MVT VT : MVT::integer_valuetypes()) {
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ }
+
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
@@ -2106,7 +2115,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// is Big Endian.
unsigned OpIdx = NElts - i - 1;
SDValue Operand = BVN->getOperand(OpIdx);
- if (dyn_cast<ConstantSDNode>(Operand))
+ if (isa<ConstantSDNode>(Operand))
// This operand is already in ConstVal.
continue;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fbf1ca9..ff4bcad 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -845,8 +845,7 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
}
-int HexagonInstrInfo::
-getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
enum Hexagon::PredSense inPredSense;
inPredSense = invertPredicate ? Hexagon::PredSense_false :
Hexagon::PredSense_true;
@@ -884,7 +883,7 @@ PredicateInstruction(MachineInstr *MI,
// This will change MI's opcode to its predicate version.
// However, its operand list is still the old one, i.e. the
// non-predicate one.
- MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+ MI->setDesc(get(getCondOpcode(Opc, invertJump)));
int oper = -1;
unsigned int GAIdx = 0;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2644248..284dde1 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -216,9 +216,7 @@ public:
short getNonExtOpcode(const MachineInstr *MI) const;
bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
bool PredOpcodeHasNot(Opcode_t Opcode) const;
-
-private:
- int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+ int getCondOpcode(int Opc, bool sense) const;
};
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index 1717ae3..d61cc54 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -72,7 +72,7 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS,
const TargetMachine &TM)
- : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()),
+ : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
TSInfo(*TM.getDataLayout()), FrameLowering() {
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 48b0bc8..0679866 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -27,11 +27,15 @@
using namespace llvm;
static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
- cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
+ cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
- cl::Hidden, cl::ZeroOrMore, cl::init(false),
- cl::desc("Disable Hexagon CFG Optimization"));
+ cl::Hidden, cl::ZeroOrMore, cl::init(false),
+ cl::desc("Disable Hexagon CFG Optimization"));
+
+static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
+ cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Early expansion of MUX"));
/// HexagonTargetMachineModule - Note that this is used on hosts that
@@ -55,6 +59,10 @@ static MachineSchedRegistry
SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
createVLIWMachineSched);
+namespace llvm {
+ FunctionPass *createHexagonExpandCondsets();
+}
+
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
///
@@ -79,7 +87,15 @@ namespace {
class HexagonPassConfig : public TargetPassConfig {
public:
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {
+ bool NoOpt = (TM->getOptLevel() == CodeGenOpt::None);
+ if (!NoOpt) {
+ if (EnableExpandCondsets) {
+ Pass *Exp = createHexagonExpandCondsets();
+ insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
+ }
+ }
+ }
HexagonTargetMachine &getHexagonTargetMachine() const {
return getTM<HexagonTargetMachine>();
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index bdccf88..155aa9e 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -57,7 +57,7 @@ public:
ELFHexagonAsmBackend(Target const &T, uint8_t OSABI)
: HexagonAsmBackend(T), OSABI(OSABI) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
StringRef CPU("HexagonV4");
return createHexagonELFObjectWriter(OS, OSABI, CPU);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
index 4a3ac8c..fde935b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonELFObjectWriter.cpp
@@ -27,8 +27,8 @@ private:
public:
HexagonELFObjectWriter(uint8_t OSABI, StringRef C);
- virtual unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
- bool IsPCRel) const override;
+ unsigned GetRelocType(MCValue const &Target, MCFixup const &Fixup,
+ bool IsPCRel) const override;
};
}
@@ -55,9 +55,9 @@ unsigned HexagonELFObjectWriter::GetRelocType(MCValue const &/*Target*/,
return Type;
}
-MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createHexagonELFObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI,
StringRef CPU) {
MCELFObjectTargetWriter *MOTW = new HexagonELFObjectWriter(OSABI, CPU);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
-} \ No newline at end of file
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 6c87c9f..ec55234 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -78,7 +78,8 @@ StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
}
void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
const char startPacket = '{',
endPacket = '}';
// TODO: add outer HW loop when it's supported too.
@@ -94,7 +95,7 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
Nop.setOpcode (Hexagon::A2_nop);
HexagonMCInstrInfo::setPacketBegin (Nop, HexagonMCInstrInfo::isPacketBegin(*MI));
- printInst (&Nop, O, NoAnnot);
+ printInst (&Nop, O, NoAnnot, STI);
}
// Close the packet.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index d02243b..98fb99b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
MCRegisterInfo const &MRI)
: MCInstPrinter(MAI, MII, MRI), MII(MII) {}
- void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
virtual StringRef getOpcodeName(unsigned Opcode) const;
void printInstruction(const MCInst *MI, raw_ostream &O);
StringRef getRegName(unsigned RegNo) const;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index c63bf32..2e10d81 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -75,13 +75,16 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
return X;
}
-static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- return new HexagonInstPrinter(MAI, MII, MRI);
+ const MCRegisterInfo &MRI) {
+ if (SyntaxVariant == 0)
+ return(new HexagonInstPrinter(MAI, MII, MRI));
+ else
+ return nullptr;
}
// Force static initialization.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index 17072d9..de63fd2 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
class Target;
class StringRef;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheHexagonTarget;
@@ -40,8 +41,8 @@ MCAsmBackend *createHexagonAsmBackend(Target const &T,
MCRegisterInfo const &MRI, StringRef TT,
StringRef CPU);
-MCObjectWriter *createHexagonELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
- StringRef CPU);
+MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI, StringRef CPU);
} // End llvm namespace
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index acf1214..6c43d97 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
#include "MSP430GenAsmWriter.inc"
void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
printInstruction(MI, O);
printAnnotation(O, Annot);
}
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 7fae505..70141a9 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -25,7 +25,8 @@ namespace llvm {
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 4c70803..775c0b2 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -58,12 +58,11 @@ static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
return new MSP430InstPrinter(MAI, MII, MRI);
return nullptr;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 18141a6..08f41a8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -645,6 +645,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
Flags.getByValAlign(),
/*isVolatile*/false,
/*AlwaysInline=*/true,
+ /*isTailCall=*/false,
MachinePointerInfo(),
MachinePointerInfo());
} else {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 68868b6..9266c3b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -102,12 +102,6 @@ namespace llvm {
const std::string &Constraint,
MVT VT) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
- // FIXME: Map different constraints differently.
- return InlineAsm::Constraint_m;
- }
-
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 05352a2..c63a57c 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -62,7 +62,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateSymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name);
}
MCSymbol *MSP430MCInstLower::
@@ -79,7 +79,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateSymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name);
}
MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 6401bc1..6f7e3c1 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -220,6 +220,7 @@ class MipsAsmParser : public MCTargetAsmParser {
bool parseDirectiveNaN();
bool parseDirectiveSet();
bool parseDirectiveOption();
+ bool parseInsnDirective();
bool parseSetAtDirective();
bool parseSetNoAtDirective();
@@ -272,7 +273,10 @@ class MipsAsmParser : public MCTargetAsmParser {
unsigned getGPR(int RegNo);
- int getATReg(SMLoc Loc);
+ /// Returns the internal register number for the current AT. Also checks if
+ /// the current AT is unavailable (set to $0) and gives an error if it is.
+ /// This should be used in pseudo-instruction expansions which need AT.
+ unsigned getATReg(SMLoc Loc);
bool processInstruction(MCInst &Inst, SMLoc IDLoc,
SmallVectorImpl<MCInst> &Instructions);
@@ -1713,7 +1717,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
// FIXME: gas has a special case for values that are 000...1111, which
// becomes a li -1 and then a dsrl
if (0 <= ImmValue && ImmValue <= 65535) {
- // For 0 <= j <= 65535.
+ // For unsigned and positive signed 16-bit values (0 <= j <= 65535):
// li d,j => ori d,$zero,j
tmpInst.setOpcode(Mips::ORi);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1721,7 +1725,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else if (ImmValue < 0 && ImmValue >= -32768) {
- // For -32768 <= j < 0.
+ // For negative signed 16-bit values (-32768 <= j < 0):
// li d,j => addiu d,$zero,j
tmpInst.setOpcode(Mips::ADDiu);
tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg()));
@@ -1729,8 +1733,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
tmpInst.addOperand(MCOperand::CreateImm(ImmValue));
Instructions.push_back(tmpInst);
} else if ((ImmValue & 0xffffffff) == ImmValue) {
- // For any value of j that is representable as a 32-bit integer, create
- // a sequence of:
+ // For all other values which are representable as a 32-bit integer:
// li d,j => lui d,hi16(j)
// ori d,d,lo16(j)
tmpInst.setOpcode(Mips::LUi);
@@ -1752,8 +1755,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
// | 16-bytes | 16-bytes | 16-bytes |
// |__________|__________|__________|
//
- // For any value of j that is representable as a 48-bit integer, create
- // a sequence of:
+ // For any 64-bit value that is representable as a 48-bit integer:
// li d,j => lui d,hi16(j)
// ori d,d,hi16(lo32(j))
// dsll d,d,16
@@ -1778,7 +1780,7 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc,
// | 16-bytes | 16-bytes | 16-bytes | 16-bytes |
// |__________|__________|__________|__________|
//
- // For any value of j that isn't representable as a 48-bit integer.
+ // For all other values which are representable as a 64-bit integer:
// li d,j => lui d,hi16(j)
// ori d,d,lo16(hi32(j))
// dsll d,d,16
@@ -2048,13 +2050,11 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
if (isLoad && IsGPR && (BaseRegNum != RegOpNum))
TmpRegNum = RegOpNum;
else {
- int AT = getATReg(IDLoc);
// At this point we need AT to perform the expansions and we exit if it is
// not available.
- if (!AT)
+ TmpRegNum = getATReg(IDLoc);
+ if (!TmpRegNum)
return;
- TmpRegNum = getReg(
- (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT);
}
TempInst.setOpcode(Mips::LUi);
@@ -2078,12 +2078,14 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
// Prepare TempInst for next instruction.
TempInst.clear();
// Add temp register to base.
- TempInst.setOpcode(Mips::ADDu);
- TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
- TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
- TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
- Instructions.push_back(TempInst);
- TempInst.clear();
+ if (BaseRegNum != Mips::ZERO) {
+ TempInst.setOpcode(Mips::ADDu);
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(TmpRegNum));
+ TempInst.addOperand(MCOperand::CreateReg(BaseRegNum));
+ Instructions.push_back(TempInst);
+ TempInst.clear();
+ }
// And finally, create original instruction with low part
// of offset and new base.
TempInst.setOpcode(Inst.getOpcode());
@@ -2383,11 +2385,15 @@ int MipsAsmParser::matchMSA128CtrlRegisterName(StringRef Name) {
return CC;
}
-int MipsAsmParser::getATReg(SMLoc Loc) {
- int AT = AssemblerOptions.back()->getATRegNum();
- if (AT == 0)
+unsigned MipsAsmParser::getATReg(SMLoc Loc) {
+ unsigned ATIndex = AssemblerOptions.back()->getATRegNum();
+ if (ATIndex == 0) {
reportParseError(Loc,
"pseudo-instruction requires $at, which is not available");
+ return 0;
+ }
+ unsigned AT = getReg(
+ (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, ATIndex);
return AT;
}
@@ -2571,7 +2577,7 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
if (Tok.isNot(AsmToken::Identifier))
return true;
- std::string Str = Tok.getIdentifier().str();
+ std::string Str = Tok.getIdentifier();
Parser.Lex(); // Eat the identifier.
// Now make an expression from the rest of the operand.
@@ -3579,11 +3585,7 @@ bool MipsAsmParser::parseSetAssignment() {
if (Parser.parseExpression(Value))
return reportParseError("expected valid expression after comma");
- // Check if the Name already exists as a symbol.
- MCSymbol *Sym = getContext().LookupSymbol(Name);
- if (Sym)
- return reportParseError("symbol already defined");
- Sym = getContext().GetOrCreateSymbol(Name);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
Sym->setVariableValue(Value);
return false;
@@ -4044,6 +4046,23 @@ bool MipsAsmParser::parseDirectiveOption() {
return false;
}
+/// parseInsnDirective
+/// ::= .insn
+bool MipsAsmParser::parseInsnDirective() {
+ // If this is not the end of the statement, report an error.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ reportParseError("unexpected token, expected end of statement");
+ return false;
+ }
+
+ // The actual label marking happens in
+ // MipsELFStreamer::createPendingLabelRelocs().
+ getTargetStreamer().emitDirectiveInsn();
+
+ getParser().Lex(); // Eat EndOfStatement token.
+ return false;
+}
+
/// parseDirectiveModule
/// ::= .module oddspreg
/// ::= .module nooddspreg
@@ -4437,6 +4456,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".llvm_internal_mips_reallow_module_directive")
return parseInternalDirectiveReallowModule();
+ if (IDVal == ".insn")
+ return parseInsnDirective();
+
return true;
}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index aad549d..e80a47b 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -77,7 +77,7 @@ void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
switch (MI->getOpcode()) {
default:
break;
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 468dc07..713f35c 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -86,7 +86,8 @@ public:
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index acf6f21..dbcd867 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -155,7 +155,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
return Value;
}
-MCObjectWriter *MipsAsmBackend::createObjectWriter(raw_ostream &OS) const {
+MCObjectWriter *
+MipsAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
return createMipsELFObjectWriter(OS,
MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 243b73d..b3d5a49 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -36,7 +36,7 @@ public:
bool Is64Bit)
: MCAsmBackend(), OSType(OSType), IsLittle(IsLittle), Is64Bit(Is64Bit) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
uint64_t Value, bool IsPCRel) const override;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index a68bf16..8d9e3e3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -10,6 +10,7 @@
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFObjectWriter.h"
@@ -22,17 +23,33 @@
using namespace llvm;
namespace {
+// A helper structure based on ELFRelocationEntry, used for sorting entries in
+// the relocation table.
+struct MipsRelocationEntry {
+ MipsRelocationEntry(const ELFRelocationEntry &R)
+ : R(R), SortOffset(R.Offset), HasMatchingHi(false) {}
+ const ELFRelocationEntry R;
+ // SortOffset equals R.Offset except for the *HI16 relocations, for which it
+ // will be set based on the R.Offset of the matching *LO16 relocation.
+ int64_t SortOffset;
+ // True when this is a *LO16 relocation chosen as a match for a *HI16
+ // relocation.
+ bool HasMatchingHi;
+};
+
class MipsELFObjectWriter : public MCELFObjectTargetWriter {
public:
MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
bool _isN64, bool IsLittleEndian);
- virtual ~MipsELFObjectWriter();
+ ~MipsELFObjectWriter() override;
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
bool needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const override;
+ virtual void sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) override;
};
}
@@ -225,6 +242,169 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
return Type;
}
+// Sort entries by SortOffset in descending order.
+// When there are more *HI16 relocs paired with one *LO16 reloc, the 2nd rule
+// sorts them in ascending order of R.Offset.
+static int cmpRelMips(const MipsRelocationEntry *AP,
+ const MipsRelocationEntry *BP) {
+ const MipsRelocationEntry &A = *AP;
+ const MipsRelocationEntry &B = *BP;
+ if (A.SortOffset != B.SortOffset)
+ return B.SortOffset - A.SortOffset;
+ if (A.R.Offset != B.R.Offset)
+ return A.R.Offset - B.R.Offset;
+ if (B.R.Type != A.R.Type)
+ return B.R.Type - A.R.Type;
+ //llvm_unreachable("ELFRelocs might be unstable!");
+ return 0;
+}
+
+// For the given Reloc.Type, return the matching relocation type, as in the
+// table below.
+static unsigned getMatchingLoType(const MCAssembler &Asm,
+ const ELFRelocationEntry &Reloc) {
+ unsigned Type = Reloc.Type;
+ if (Type == ELF::R_MIPS_HI16)
+ return ELF::R_MIPS_LO16;
+ if (Type == ELF::R_MICROMIPS_HI16)
+ return ELF::R_MICROMIPS_LO16;
+ if (Type == ELF::R_MIPS16_HI16)
+ return ELF::R_MIPS16_LO16;
+
+ const MCSymbolData &SD = Asm.getSymbolData(*Reloc.Symbol);
+
+ if (MCELF::GetBinding(SD) != ELF::STB_LOCAL)
+ return ELF::R_MIPS_NONE;
+
+ if (Type == ELF::R_MIPS_GOT16)
+ return ELF::R_MIPS_LO16;
+ if (Type == ELF::R_MICROMIPS_GOT16)
+ return ELF::R_MICROMIPS_LO16;
+ if (Type == ELF::R_MIPS16_GOT16)
+ return ELF::R_MIPS16_LO16;
+
+ return ELF::R_MIPS_NONE;
+}
+
+// Return true if First needs a matching *LO16, its matching *LO16 type equals
+// Second's type and both relocations are against the same symbol.
+static bool areMatchingHiAndLo(const MCAssembler &Asm,
+ const ELFRelocationEntry &First,
+ const ELFRelocationEntry &Second) {
+ return getMatchingLoType(Asm, First) != ELF::R_MIPS_NONE &&
+ getMatchingLoType(Asm, First) == Second.Type &&
+ First.Symbol && First.Symbol == Second.Symbol;
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 preceded by a matching *HI16.
+static bool
+isPrecededByMatchingHi(const MCAssembler &Asm, uint32_t Index,
+ std::vector<MipsRelocationEntry> &MipsRelocs) {
+ return Index < MipsRelocs.size() - 1 &&
+ areMatchingHiAndLo(Asm, MipsRelocs[Index + 1].R, MipsRelocs[Index].R);
+}
+
+// Return true if MipsRelocs[Index] is a *LO16 not preceded by a matching *HI16
+// and not chosen by a *HI16 as a match.
+static bool isFreeLo(const MCAssembler &Asm, uint32_t Index,
+ std::vector<MipsRelocationEntry> &MipsRelocs) {
+ return Index < MipsRelocs.size() && !MipsRelocs[Index].HasMatchingHi &&
+ !isPrecededByMatchingHi(Asm, Index, MipsRelocs);
+}
+
+// Lo is chosen as a match for Hi, set their fields accordingly.
+// Mips instructions have fixed length of at least two bytes (two for
+// micromips/mips16, four for mips32/64), so we can set HI's SortOffset to
+// matching LO's Offset minus one to simplify the sorting function.
+static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
+ Lo.HasMatchingHi = true;
+ Hi.SortOffset = Lo.R.Offset - 1;
+}
+
+// We sort relocation table entries by offset, except for one additional rule
+// required by MIPS ABI: every *HI16 relocation must be immediately followed by
+// the corresponding *LO16 relocation. We also support a GNU extension that
+// allows more *HI16s paired with one *LO16.
+//
+// *HI16 relocations and their matching *LO16 are:
+//
+// +---------------------------------------------+-------------------+
+// | *HI16 | matching *LO16 |
+// |---------------------------------------------+-------------------|
+// | R_MIPS_HI16, local R_MIPS_GOT16 | R_MIPS_LO16 |
+// | R_MICROMIPS_HI16, local R_MICROMIPS_GOT16 | R_MICROMIPS_LO16 |
+// | R_MIPS16_HI16, local R_MIPS16_GOT16 | R_MIPS16_LO16 |
+// +---------------------------------------------+-------------------+
+//
+// (local R_*_GOT16 meaning R_*_GOT16 against the local symbol.)
+//
+// To handle *HI16 and *LO16 relocations, the linker needs a combined addend
+// ("AHL") calculated from both *HI16 ("AHI") and *LO16 ("ALO") relocations:
+// AHL = (AHI << 16) + (short)ALO;
+//
+// We are reusing gnu as sorting algorithm so we are emitting the relocation
+// table sorted the same way as gnu as would sort it, for easier comparison of
+// the generated .o files.
+//
+// The logic is:
+// search the table (starting from the highest offset and going back to zero)
+// for all *HI16 relocations that don't have a matching *LO16.
+// For every such HI, find a matching LO with highest offset that isn't already
+// matched with another HI. If there are no free LOs, match it with the first
+// found (starting from lowest offset).
+// When there are more HIs matched with one LO, sort them in descending order by
+// offset.
+//
+// In other words, when searching for a matching LO:
+// - don't look for a 'better' match for the HIs that are already followed by a
+// matching LO;
+// - prefer LOs without a pair;
+// - prefer LOs with higher offset;
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+ std::vector<ELFRelocationEntry> &Relocs) {
+ if (Relocs.size() < 2)
+ return;
+
+ // The default function sorts entries by Offset in descending order.
+ MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+
+ // Init MipsRelocs from Relocs.
+ std::vector<MipsRelocationEntry> MipsRelocs;
+ for (unsigned I = 0, E = Relocs.size(); I != E; ++I)
+ MipsRelocs.push_back(MipsRelocationEntry(Relocs[I]));
+
+ // Find a matching LO for all HIs that need it.
+ for (int32_t I = 0, E = MipsRelocs.size(); I != E; ++I) {
+ if (getMatchingLoType(Asm, MipsRelocs[I].R) == ELF::R_MIPS_NONE ||
+ (I > 0 && isPrecededByMatchingHi(Asm, I - 1, MipsRelocs)))
+ continue;
+
+ int32_t MatchedLoIndex = -1;
+
+ // Search the list in the ascending order of Offset.
+ for (int32_t J = MipsRelocs.size() - 1, N = -1; J != N; --J) {
+ // check for a match
+ if (areMatchingHiAndLo(Asm, MipsRelocs[I].R, MipsRelocs[J].R) &&
+ (MatchedLoIndex == -1 || // first match
+ // or we already have a match,
+ // but this one is with higher offset and it's free
+ (MatchedLoIndex > J && isFreeLo(Asm, J, MipsRelocs))))
+ MatchedLoIndex = J;
+ }
+
+ if (MatchedLoIndex != -1)
+ // We have a match.
+ setMatch(MipsRelocs[I], MipsRelocs[MatchedLoIndex]);
+ }
+
+ // SortOffsets are calculated, call the sorting function.
+ array_pod_sort(MipsRelocs.begin(), MipsRelocs.end(), cmpRelMips);
+
+ // Copy sorted MipsRelocs back to Relocs.
+ for (unsigned I = 0, E = MipsRelocs.size(); I != E; ++I)
+ Relocs[I] = MipsRelocs[I].R;
+}
+
bool
MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
unsigned Type) const {
@@ -264,7 +444,8 @@ MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_pwrite_stream &OS,
+ uint8_t OSABI,
bool IsLittleEndian,
bool Is64Bit) {
MCELFObjectTargetWriter *MOTW =
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 93f60df..6d1d9f4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -21,8 +21,6 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
MCContext &Context = getContext();
const MCRegisterInfo *MCRegInfo = Context.getRegisterInfo();
- MipsTargetELFStreamer *ELFTargetStreamer =
- static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
for (unsigned OpIndex = 0; OpIndex < Inst.getNumOperands(); ++OpIndex) {
const MCOperand &Op = Inst.getOperand(OpIndex);
@@ -34,6 +32,14 @@ void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
RegInfoRecord->SetPhysRegUsed(Reg, MCRegInfo);
}
+ createPendingLabelRelocs();
+}
+
+void MipsELFStreamer::createPendingLabelRelocs() {
+ MipsTargetELFStreamer *ELFTargetStreamer =
+ static_cast<MipsTargetELFStreamer *>(getTargetStreamer());
+
+ // FIXME: Also mark labels when in MIPS16 mode.
if (ELFTargetStreamer->isMicroMipsEnabled()) {
for (auto Label : Labels) {
MCSymbolData &Data = getOrCreateSymbolData(Label);
@@ -70,7 +76,8 @@ void MipsELFStreamer::EmitMipsOptionRecords() {
}
MCELFStreamer *llvm::createMipsELFStreamer(MCContext &Context,
- MCAsmBackend &MAB, raw_ostream &OS,
+ MCAsmBackend &MAB,
+ raw_pwrite_stream &OS,
MCCodeEmitter *Emitter,
bool RelaxAll) {
return new MipsELFStreamer(Context, MAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 6b834c6..4e30901 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -33,7 +33,7 @@ class MipsELFStreamer : public MCELFStreamer {
public:
- MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS,
+ MipsELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter)
: MCELFStreamer(Context, MAB, OS, Emitter) {
@@ -65,10 +65,13 @@ public:
/// Emits all the option records stored up until the point it's called.
void EmitMipsOptionRecords();
+
+ /// Mark labels as microMIPS, if necessary for the subtarget.
+ void createPendingLabelRelocs();
};
MCELFStreamer *createMipsELFStreamer(MCContext &Context, MCAsmBackend &MAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll);
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
} // namespace llvm.
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index b01726d..cc40e2e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -43,7 +43,7 @@ public:
MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle)
: MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
- ~MipsMCCodeEmitter() {}
+ ~MipsMCCodeEmitter() override {}
void EmitByte(unsigned char C, raw_ostream &OS) const;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index e6b5be7..687b800 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -23,9 +23,8 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg);
// This function creates an MCELFStreamer for Mips NaCl.
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll);
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *Emitter, bool RelaxAll);
}
#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 6f3f37b..a75d27d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -97,17 +97,16 @@ static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+static MCInstPrinter *createMipsMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new MipsInstPrinter(MAI, MII, MRI);
}
static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
- MCAsmBackend &MAB, raw_ostream &OS,
+ MCAsmBackend &MAB, raw_pwrite_stream &OS,
MCCodeEmitter *Emitter, bool RelaxAll) {
MCStreamer *S;
if (!T.isOSNaCl())
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 92f394a..577a8b3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -27,6 +27,7 @@ class MCSubtargetInfo;
class StringRef;
class Target;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheMipsTarget;
extern Target TheMipselTarget;
@@ -53,7 +54,7 @@ MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
const MCRegisterInfo &MRI, StringRef TT,
StringRef CPU);
-MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
+MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
bool IsLittleEndian, bool Is64Bit);
namespace MIPS_MC {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 1adfdf9..35348aa 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -36,11 +36,11 @@ const unsigned LoadStoreStackMaskReg = Mips::T7;
class MipsNaClELFStreamer : public MipsELFStreamer {
public:
- MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter)
+ MipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_pwrite_stream &OS, MCCodeEmitter *Emitter)
: MipsELFStreamer(Context, TAB, OS, Emitter), PendingCall(false) {}
- ~MipsNaClELFStreamer() {}
+ ~MipsNaClELFStreamer() override {}
private:
// Whether we started the sandboxing sequence for calls. Calls are bundled
@@ -252,7 +252,7 @@ bool baseRegNeedsLoadStoreMask(unsigned Reg) {
}
MCELFStreamer *createMipsNaClELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS,
+ raw_pwrite_stream &OS,
MCCodeEmitter *Emitter,
bool RelaxAll) {
MipsNaClELFStreamer *S = new MipsNaClELFStreamer(Context, TAB, OS, Emitter);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 5790a5c..cfd56c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -54,6 +54,7 @@ void MipsTargetStreamer::emitDirectiveNaN2008() {}
void MipsTargetStreamer::emitDirectiveNaNLegacy() {}
void MipsTargetStreamer::emitDirectiveOptionPic0() {}
void MipsTargetStreamer::emitDirectiveOptionPic2() {}
+void MipsTargetStreamer::emitDirectiveInsn() { forbidModuleDirective(); }
void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg) {}
void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {}
@@ -189,6 +190,11 @@ void MipsTargetAsmStreamer::emitDirectiveOptionPic2() {
OS << "\t.option\tpic2\n";
}
+void MipsTargetAsmStreamer::emitDirectiveInsn() {
+ MipsTargetStreamer::emitDirectiveInsn();
+ OS << "\t.insn\n";
+}
+
void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg) {
OS << "\t.frame\t$"
@@ -507,9 +513,8 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol,
const MCSymbol &RhsSym =
static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym);
- uint8_t Type = MCELF::GetType(Data);
- if ((Type != ELF::STT_FUNC) ||
- !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
+
+ if (!(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2)))
return;
MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol);
@@ -637,6 +642,12 @@ void MipsTargetELFStreamer::emitDirectiveOptionPic2() {
MCA.setELFHeaderEFlags(Flags);
}
+void MipsTargetELFStreamer::emitDirectiveInsn() {
+ MipsTargetStreamer::emitDirectiveInsn();
+ MipsELFStreamer &MEF = static_cast<MipsELFStreamer &>(Streamer);
+ MEF.createPendingLabelRelocs();
+}
+
void MipsTargetELFStreamer::emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg_) {
MCContext &Context = getStreamer().getAssembler().getContext();
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index abecfa0..5828fbd 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -143,25 +143,6 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void Mips16FrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- if (!hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
-
- const Mips16InstrInfo &TII =
- *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
-
- TII.adjustStackPtr(Mips::SP, Amount, MBB, I);
- }
-
- MBB.erase(I);
-}
-
bool
Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 012d558..0287e59 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -26,10 +26,6 @@ public:
void emitPrologue(MachineFunction &MF) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 00d4495..a49572e 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -293,6 +293,9 @@ void Mips16InstrInfo::adjustStackPtrBigUnrestricted(
void Mips16InstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount)) // need to change to addiu sp, ....and isInt<16>
BuildAddiuSpImm(MBB, I, Amount);
else
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index f9b7387..6540b40 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -77,7 +77,7 @@ public:
/// Adjust SP by Amount bytes.
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I) const override;
/// Emit a series of instructions to load an immediate.
// This is to adjust some FrameReg. We return the new register to be used
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 1eb3b2c..9024f21 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -260,31 +260,22 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
unsigned AFGR64RegSize = Mips::AFGR64RegClass.getSize();
bool HasAFGR64Reg = false;
unsigned CSFPRegsSize = 0;
- unsigned i, e = CSI.size();
-
- // Set FPU Bitmask.
- for (i = 0; i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- if (Mips::GPR32RegClass.contains(Reg))
- break;
+ for (const auto &I : CSI) {
+ unsigned Reg = I.getReg();
unsigned RegNum = TRI->getEncodingValue(Reg);
- if (Mips::AFGR64RegClass.contains(Reg)) {
+
+ // If it's a floating point register, set the FPU Bitmask.
+ // If it's a general purpose register, set the CPU Bitmask.
+ if (Mips::FGR32RegClass.contains(Reg)) {
+ FPUBitmask |= (1 << RegNum);
+ CSFPRegsSize += FGR32RegSize;
+ } else if (Mips::AFGR64RegClass.contains(Reg)) {
FPUBitmask |= (3 << RegNum);
CSFPRegsSize += AFGR64RegSize;
HasAFGR64Reg = true;
- continue;
- }
-
- FPUBitmask |= (1 << RegNum);
- CSFPRegsSize += FGR32RegSize;
- }
-
- // Set CPU Bitmask.
- for (; i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- unsigned RegNum = TRI->getEncodingValue(Reg);
- CPUBitmask |= (1 << RegNum);
+ } else if (Mips::GPR32RegClass.contains(Reg))
+ CPUBitmask |= (1 << RegNum);
}
// FP Regs are saved right below where the virtual frame pointer points to.
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 7de0081..e8e3d3d 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -440,7 +440,7 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
bool MipsFastISel::computeCallAddress(const Value *V, Address &Addr) {
const GlobalValue *GV = dyn_cast<GlobalValue>(V);
- if (GV && isa<Function>(GV) && dyn_cast<Function>(GV)->isIntrinsic())
+ if (GV && isa<Function>(GV) && cast<Function>(GV)->isIntrinsic())
return false;
if (!GV)
return false;
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 8b8b019..826fbaf 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -131,3 +131,20 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
return RoundUpToAlignment(Offset, getStackAlignment());
}
+
+// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
+void MipsFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ unsigned SP = STI.getABI().IsN64() ? Mips::SP_64 : Mips::SP;
+
+ if (!hasReservedCallFrame(MF)) {
+ int64_t Amount = I->getOperand(0).getImm();
+ if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
+ Amount = -Amount;
+
+ STI.getInstrInfo()->adjustStackPtr(SP, Amount, MBB, I);
+ }
+
+ MBB.erase(I);
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 90a8d2a..96d1e29 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -32,6 +32,11 @@ public:
bool hasFP(const MachineFunction &MF) const override;
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
protected:
uint64_t estimateStackSize(const MachineFunction &MF) const;
};
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e4bae03..f37737d 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -3682,6 +3682,7 @@ void MipsTargetLowering::passByValArg(
DAG.getIntPtrConstant(VA.getLocMemOffset()));
Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
+ /*isTailCall=*/false,
MachinePointerInfo(), MachinePointerInfo());
MemOpChains.push_back(Chain);
}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7b2b289..4589535 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -117,6 +117,10 @@ public:
const TargetRegisterInfo *TRI,
int64_t Offset) const = 0;
+ virtual void adjustStackPtr(unsigned SP, int64_t Amount,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const = 0;
+
/// Create an instruction which has the same operands and memory operands
/// as MI but has a new opcode.
MachineInstrBuilder genInstrWithNewOpc(unsigned NewOpc,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index c937d2b..a1fad66 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -186,10 +186,8 @@ def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">,
AssemblerPredicate<"FeatureMips16">;
def HasCnMips : Predicate<"Subtarget->hasCnMips()">,
AssemblerPredicate<"FeatureCnMips">;
-def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
- AssemblerPredicate<"FeatureMips32">;
-def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
- AssemblerPredicate<"FeatureMips32">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
@@ -1596,8 +1594,12 @@ def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
}
def : MipsInstAlias<"bnez $rs,$offset",
(BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bnezl $rs,$offset",
+ (BNEL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : MipsInstAlias<"beqz $rs,$offset",
(BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"beqzl $rs,$offset",
+ (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 09e722d..0d1ee04 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -60,15 +60,7 @@ void MipsCallEntry::printCustom(raw_ostream &O) const {
#endif
}
-MipsFunctionInfo::~MipsFunctionInfo() {
- for (StringMap<const MipsCallEntry *>::iterator
- I = ExternalCallEntries.begin(), E = ExternalCallEntries.end(); I != E;
- ++I)
- delete I->getValue();
-
- for (const auto &Entry : GlobalCallEntries)
- delete Entry.second;
-}
+MipsFunctionInfo::~MipsFunctionInfo() {}
bool MipsFunctionInfo::globalBaseRegSet() const {
return GlobalBaseReg;
@@ -125,21 +117,21 @@ bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
}
MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
- const MipsCallEntry *&E = ExternalCallEntries[Name];
+ std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
if (!E)
- E = new MipsCallEntry(Name);
+ E = llvm::make_unique<MipsCallEntry>(Name);
- return MachinePointerInfo(E);
+ return MachinePointerInfo(E.get());
}
MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
- const MipsCallEntry *&E = GlobalCallEntries[Val];
+ std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
if (!E)
- E = new MipsCallEntry(Val);
+ E = llvm::make_unique<MipsCallEntry>(Val);
- return MachinePointerInfo(E);
+ return MachinePointerInfo(E.get());
}
int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 217f307..32436ef 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -144,8 +144,9 @@ private:
int MoveF64ViaSpillFI;
/// MipsCallEntry maps.
- StringMap<const MipsCallEntry *> ExternalCallEntries;
- ValueMap<const GlobalValue *, const MipsCallEntry *> GlobalCallEntries;
+ StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
+ ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
+ GlobalCallEntries;
};
} // end of namespace llvm
diff --git a/lib/Target/Mips/MipsOptionRecord.h b/lib/Target/Mips/MipsOptionRecord.h
index dc29cbd..746feab 100644
--- a/lib/Target/Mips/MipsOptionRecord.h
+++ b/lib/Target/Mips/MipsOptionRecord.h
@@ -52,7 +52,7 @@ public:
COP2RegClass = &(TRI->getRegClass(Mips::COP2RegClassID));
COP3RegClass = &(TRI->getRegClass(Mips::COP3RegClassID));
}
- ~MipsRegInfoRecord() {}
+ ~MipsRegInfoRecord() override {}
void EmitMipsOptionRecord() override;
void SetPhysRegUsed(unsigned Reg, const MCRegisterInfo *MCRegInfo);
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 7c79c4c..23feb5c 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -607,26 +607,6 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
!MFI->hasVarSizedObjects();
}
-// Eliminate ADJCALLSTACKDOWN, ADJCALLSTACKUP pseudo instructions
-void MipsSEFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const MipsSEInstrInfo &TII =
- *static_cast<const MipsSEInstrInfo *>(STI.getInstrInfo());
-
- if (!hasReservedCallFrame(MF)) {
- int64_t Amount = I->getOperand(0).getImm();
-
- if (I->getOpcode() == Mips::ADJCALLSTACKDOWN)
- Amount = -Amount;
-
- unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
- TII.adjustStackPtr(SP, Amount, MBB, I);
- }
-
- MBB.erase(I);
-}
-
void MipsSEFrameLowering::
processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 0eca1df..22448a4 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,10 +27,6 @@ public:
void emitPrologue(MachineFunction &MF) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const override;
-
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index a598c3f..6daa632 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -948,7 +948,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
llvm_unreachable("Unexpected asm memory constraint");
// All memory constraints can at least accept raw pointers.
case InlineAsm::Constraint_i:
- case InlineAsm::Constraint_R:
OutOps.push_back(Op);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
return false;
@@ -961,6 +960,20 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
OutOps.push_back(Op);
OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
return false;
+ case InlineAsm::Constraint_R:
+ // The 'R' constraint is supposed to be much more complicated than this.
+ // However, it's becoming less useful due to architectural changes and
+ // ought to be replaced by other constraints such as 'ZC'.
+ // For now, support 9-bit signed offsets which is supportable by all
+ // subtargets for all instructions.
+ if (selectAddrRegImm9(Op, Base, Offset)) {
+ OutOps.push_back(Base);
+ OutOps.push_back(Offset);
+ return false;
+ }
+ OutOps.push_back(Op);
+ OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
+ return false;
case InlineAsm::Constraint_ZC:
// ZC matches whatever the pref, ll, and sc instructions can handle for the
// given subtarget.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index b992579..cb38393 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -364,6 +364,9 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
+ if (Amount == 0)
+ return;
+
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index d16fab2..bebbabf 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -68,7 +68,7 @@ public:
/// Adjust SP by Amount bytes.
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock::iterator I) const override;
/// Emit a series of instructions to load an immediate. If NewImm is a
/// non-NULL parameter, the last instruction is not emitted, but instead
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 1ff041d..22b0c6c 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -45,6 +45,7 @@ public:
virtual void emitDirectiveNaNLegacy();
virtual void emitDirectiveOptionPic0();
virtual void emitDirectiveOptionPic2();
+ virtual void emitDirectiveInsn();
virtual void emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg);
virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
@@ -160,6 +161,7 @@ public:
void emitDirectiveNaNLegacy() override;
void emitDirectiveOptionPic0() override;
void emitDirectiveOptionPic2() override;
+ void emitDirectiveInsn() override;
void emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg) override;
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
@@ -227,6 +229,7 @@ public:
void emitDirectiveNaNLegacy() override;
void emitDirectiveOptionPic0() override;
void emitDirectiveOptionPic2() override;
+ void emitDirectiveInsn() override;
void emitFrame(unsigned StackReg, unsigned StackSize,
unsigned ReturnReg) override;
void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index 80b2f62..ac92df9 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -28,13 +28,9 @@ using namespace llvm;
#include "NVPTXGenAsmWriter.inc"
-
NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- setAvailableFeatures(STI.getFeatureBits());
-}
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// Decode the virtual register
@@ -72,7 +68,7 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void NVPTXInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
printInstruction(MI, OS);
// Next always print the annotation.
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 0496964..02c5a21 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -25,10 +25,11 @@ class MCSubtargetInfo;
class NVPTXInstPrinter : public MCInstPrinter {
public:
NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
+ const MCRegisterInfo &MRI);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 2b4d864..f9e4324 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -58,14 +58,13 @@ static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
return X;
}
-static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
+static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new NVPTXInstPrinter(MAI, MII, MRI, STI);
+ return new NVPTXInstPrinter(MAI, MII, MRI);
return nullptr;
}
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 93fabf6..96abfa8 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -32,20 +32,28 @@ def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
"Target SM 2.1">;
def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
"Target SM 3.0">;
+def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
+ "Target SM 3.2">;
def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
"Target SM 3.5">;
+def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
+ "Target SM 3.7">;
def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
"Target SM 5.0">;
+def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
+ "Target SM 5.2">;
+def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
+ "Target SM 5.3">;
// PTX Versions
-def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30",
- "Use PTX version 3.0">;
-def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
- "Use PTX version 3.1">;
def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
"Use PTX version 3.2">;
def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
"Use PTX version 4.0">;
+def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
+ "Use PTX version 4.1">;
+def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
+ "Use PTX version 4.2">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -57,8 +65,12 @@ class Proc<string Name, list<SubtargetFeature> Features>
def : Proc<"sm_20", [SM20]>;
def : Proc<"sm_21", [SM21]>;
def : Proc<"sm_30", [SM30]>;
+def : Proc<"sm_32", [SM32, PTX40]>;
def : Proc<"sm_35", [SM35]>;
-def : Proc<"sm_50", [SM50]>;
+def : Proc<"sm_37", [SM37, PTX41]>;
+def : Proc<"sm_50", [SM50, PTX40]>;
+def : Proc<"sm_52", [SM52, PTX41]>;
+def : Proc<"sm_53", [SM53, PTX42]>;
def NVPTXInstrInfo : InstrInfo {
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index cc58b07..9a71964 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -118,7 +118,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
DebugLoc curLoc = MI.getDebugLoc();
- if (prevDebugLoc.isUnknown() && curLoc.isUnknown())
+ if (!prevDebugLoc && !curLoc)
return;
if (prevDebugLoc == curLoc)
@@ -126,39 +126,32 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
prevDebugLoc = curLoc;
- if (curLoc.isUnknown())
+ if (!curLoc)
return;
- const MachineFunction *MF = MI.getParent()->getParent();
- //const TargetMachine &TM = MF->getTarget();
-
- const LLVMContext &ctx = MF->getFunction()->getContext();
- DIScope Scope(curLoc.getScope(ctx));
-
- assert((!Scope || Scope.isScope()) &&
- "Scope of a DebugLoc should be null or a DIScope.");
+ auto *Scope = cast_or_null<MDScope>(curLoc.getScope());
if (!Scope)
return;
- StringRef fileName(Scope.getFilename());
- StringRef dirName(Scope.getDirectory());
+ StringRef fileName(Scope->getFilename());
+ StringRef dirName(Scope->getDirectory());
SmallString<128> FullPathName = dirName;
if (!dirName.empty() && !sys::path::is_absolute(fileName)) {
sys::path::append(FullPathName, fileName);
- fileName = FullPathName.str();
+ fileName = FullPathName;
}
- if (filenameMap.find(fileName.str()) == filenameMap.end())
+ if (filenameMap.find(fileName) == filenameMap.end())
return;
// Emit the line from the source file.
if (InterleaveSrc)
- this->emitSrcInText(fileName.str(), curLoc.getLine());
+ this->emitSrcInText(fileName, curLoc.getLine());
std::stringstream temp;
- temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+ temp << "\t.loc " << filenameMap[fileName] << " " << curLoc.getLine()
<< " " << curLoc.getCol();
- OutStreamer.EmitRawText(Twine(temp.str().c_str()));
+ OutStreamer.EmitRawText(temp.str());
}
void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -641,7 +634,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
return false;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
- if (GV->getName().str() == "llvm.used")
+ if (GV->getName() == "llvm.used")
return false;
return true;
}
@@ -656,7 +649,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
- if (othergv->getName().str() == "llvm.used")
+ if (othergv->getName() == "llvm.used")
return true;
}
@@ -780,32 +773,32 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
DbgFinder.processModule(M);
unsigned i = 1;
- for (DICompileUnit DIUnit : DbgFinder.compile_units()) {
- StringRef Filename(DIUnit.getFilename());
- StringRef Dirname(DIUnit.getDirectory());
+ for (const MDCompileUnit *DIUnit : DbgFinder.compile_units()) {
+ StringRef Filename = DIUnit->getFilename();
+ StringRef Dirname = DIUnit->getDirectory();
SmallString<128> FullPathName = Dirname;
if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
sys::path::append(FullPathName, Filename);
- Filename = FullPathName.str();
+ Filename = FullPathName;
}
- if (filenameMap.find(Filename.str()) != filenameMap.end())
+ if (filenameMap.find(Filename) != filenameMap.end())
continue;
- filenameMap[Filename.str()] = i;
- OutStreamer.EmitDwarfFileDirective(i, "", Filename.str());
+ filenameMap[Filename] = i;
+ OutStreamer.EmitDwarfFileDirective(i, "", Filename);
++i;
}
- for (DISubprogram SP : DbgFinder.subprograms()) {
- StringRef Filename(SP.getFilename());
- StringRef Dirname(SP.getDirectory());
+ for (MDSubprogram *SP : DbgFinder.subprograms()) {
+ StringRef Filename = SP->getFilename();
+ StringRef Dirname = SP->getDirectory();
SmallString<128> FullPathName = Dirname;
if (!Dirname.empty() && !sys::path::is_absolute(Filename)) {
sys::path::append(FullPathName, Filename);
- Filename = FullPathName.str();
+ Filename = FullPathName;
}
- if (filenameMap.find(Filename.str()) != filenameMap.end())
+ if (filenameMap.find(Filename) != filenameMap.end())
continue;
- filenameMap[Filename.str()] = i;
+ filenameMap[Filename] = i;
++i;
}
}
@@ -1011,7 +1004,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
msg.append("Error: ");
msg.append("Symbol ");
if (V->hasName())
- msg.append(V->getName().str());
+ msg.append(V->getName());
msg.append("has unsupported appending linkage type");
llvm_unreachable(msg.c_str());
} else if (!V->hasInternalLinkage() &&
@@ -1147,7 +1140,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
const Function *demotedFunc = nullptr;
if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
- O << "// " << GVar->getName().str() << " has been demoted\n";
+ O << "// " << GVar->getName() << " has been demoted\n";
if (localDecls.find(demotedFunc) != localDecls.end())
localDecls[demotedFunc].push_back(GVar);
else {
@@ -1195,9 +1188,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// The frontend adds zero-initializer to variables that don't have an
// initial value, so skip warning for this case.
if (!GVar->getInitializer()->isNullValue()) {
- std::string warnMsg = "initial value of '" + GVar->getName().str() +
- "' is not allowed in addrspace(" +
- llvm::utostr_32(PTy->getAddressSpace()) + ")";
+ std::string warnMsg =
+ ("initial value of '" + GVar->getName() +
+ "' is not allowed in addrspace(" +
+ Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
report_fatal_error(warnMsg.c_str());
}
}
@@ -1771,12 +1765,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
- unsigned char c =
- (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
ptr = &c;
aggBuffer->addBytes(ptr, 1, Bytes);
} else if (ETy == Type::getInt16Ty(CPV->getContext())) {
- short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+ short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
ptr = (unsigned char *)&int16;
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
@@ -2086,7 +2079,7 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
std::stringstream temp;
- LineReader *reader = this->getReader(filename.str());
+ LineReader *reader = this->getReader(filename);
temp << "\n//";
temp << filename.str();
temp << ":";
@@ -2094,7 +2087,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
temp << " ";
temp << reader->readLine(line);
temp << "\n";
- this->OutStreamer.EmitRawText(Twine(temp.str()));
+ this->OutStreamer.EmitRawText(temp.str());
}
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 6d7c99c..ae63cae 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -132,9 +132,8 @@ bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
} else {
// GEP is a constant expression.
Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
- cast<Constant>(Cast->getOperand(0)),
- Indices,
- GEP->isInBounds());
+ GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
+ Indices, GEP->isInBounds());
GEP->replaceAllUsesWith(
ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
}
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 850c020..6fd09c4 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -347,6 +347,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1))
: Builder.CreateInBoundsGEP(
+ cast<GEPOperator>(C)->getSourceElementType(),
NewOperands[0],
makeArrayRef(&NewOperands[1], NumOperands - 1));
case Instruction::Select:
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index ff74e6e..8b06657 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3893,7 +3893,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
const SDNode *left = N0.getOperand(0).getNode();
const SDNode *right = N0.getOperand(1).getNode();
- if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right))
+ if (isa<ConstantSDNode>(left) || isa<ConstantSDNode>(right))
opIsLive = true;
if (!opIsLive)
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 578401a..6ab0fad 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -70,8 +70,8 @@ static void convertTransferToLoop(
// srcAddr and dstAddr are expected to be pointer types,
// so no check is made here.
- unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
- unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
+ unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointers to (char *)
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -84,9 +84,11 @@ static void convertTransferToLoop(
ind->addIncoming(ConstantInt::get(indType, 0), origBB);
// load from srcAddr+ind
- Value *val = loop.CreateLoad(loop.CreateGEP(srcAddr, ind), srcVolatile);
+ Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
+ srcVolatile);
// store at dstAddr+ind
- loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), dstVolatile);
+ loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
+ dstVolatile);
// The value for ind coming from backedge is (ind + 1)
Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
@@ -106,7 +108,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
origBB->getTerminator()->setSuccessor(0, loopBB);
IRBuilder<> builder(origBB, origBB->getTerminator());
- unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+ unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
// Cast pointer to the type of value getting stored
dstAddr =
@@ -116,7 +118,7 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
PHINode *ind = loop.CreatePHI(len->getType(), 0);
ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
- loop.CreateStore(val, loop.CreateGEP(dstAddr, ind), false);
+ loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
ind->addIncoming(newind, loopBB);
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index b8df5af..2cd10e8 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -52,7 +52,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
// Emission of machine code through MCJIT is not supported.
- bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+ bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_pwrite_stream &,
bool = true) override {
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index b8af04d..dc81802 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXTargetTransformInfo.h"
+#include "NVPTXUtilities.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +20,75 @@ using namespace llvm;
#define DEBUG_TYPE "NVPTXtti"
+// Whether the given intrinsic reads threadIdx.x/y/z.
+static bool readsThreadIndex(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::nvvm_read_ptx_sreg_tid_x:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_y:
+ case Intrinsic::nvvm_read_ptx_sreg_tid_z:
+ return true;
+ }
+}
+
+static bool readsLaneId(const IntrinsicInst *II) {
+ return II->getIntrinsicID() == Intrinsic::ptx_read_laneid;
+}
+
+// Whether the given intrinsic is an atomic instruction in PTX.
+static bool isNVVMAtomic(const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::nvvm_atomic_load_add_f32:
+ case Intrinsic::nvvm_atomic_load_inc_32:
+ case Intrinsic::nvvm_atomic_load_dec_32:
+ return true;
+ }
+}
+
+bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
+ // Without inter-procedural analysis, we conservatively assume that arguments
+ // to __device__ functions are divergent.
+ if (const Argument *Arg = dyn_cast<Argument>(V))
+ return !isKernelFunction(*Arg->getParent());
+
+ if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Without pointer analysis, we conservatively assume values loaded from
+ // generic or local address space are divergent.
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ unsigned AS = LI->getPointerAddressSpace();
+ return AS == ADDRESS_SPACE_GENERIC || AS == ADDRESS_SPACE_LOCAL;
+ }
+ // Atomic instructions may cause divergence. Atomic instructions are
+ // executed sequentially across all threads in a warp. Therefore, an earlier
+ // executed thread may see different memory inputs than a later executed
+ // thread. For example, suppose *a = 0 initially.
+ //
+ // atom.global.add.s32 d, [a], 1
+ //
+ // returns 0 for the first thread that enters the critical region, and 1 for
+ // the second thread.
+ if (I->isAtomic())
+ return true;
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ // Instructions that read threadIdx are obviously divergent.
+ if (readsThreadIndex(II) || readsLaneId(II))
+ return true;
+ // Handle the NVPTX atomic instrinsics that cannot be represented as an
+ // atomic IR instruction.
+ if (isNVVMAtomic(II))
+ return true;
+ }
+ // Conservatively consider the return value of function calls as divergent.
+ // We could analyze callees with bodies more precisely using
+ // inter-procedural analysis.
+ if (isa<CallInst>(I))
+ return true;
+ }
+
+ return false;
+}
+
unsigned NVPTXTTIImpl::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index bf21e88..4280888 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -61,6 +61,8 @@ public:
bool hasBranchDivergence() { return true; }
+ bool isSourceOfDivergence(const Value *V);
+
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 99a1633..90ab7a5 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1071,6 +1071,58 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case PPC::RLWINMbm:
+ case PPC::RLWINMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWINMbm ? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWIMIbm:
+ case PPC::RLWIMIobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWIMIbm ? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0)); // The tied operand.
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::RLWNMbm:
+ case PPC::RLWNMobm: {
+ unsigned MB, ME;
+ int64_t BM = Inst.getOperand(3).getImm();
+ if (!isRunOfOnes(BM, MB, ME))
+ break;
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Opcode == PPC::RLWNMbm ? PPC::RLWNM : PPC::RLWNMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(MCOperand::CreateImm(MB));
+ TmpInst.addOperand(MCOperand::CreateImm(ME));
+ Inst = TmpInst;
+ break;
+ }
}
}
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index a9f5fc7..5cbf3d9 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -25,7 +25,7 @@ class PPCDisassembler : public MCDisassembler {
public:
PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~PPCDisassembler() {}
+ ~PPCDisassembler() override {}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 311a4f2..1576544 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -51,7 +51,7 @@ void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
// Check for slwi/srwi mnemonics.
if (MI->getOpcode() == PPC::RLWINM) {
unsigned char SH = MI->getOperand(2).getImm();
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8718743..eca37eb 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -32,7 +32,8 @@ public:
}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index bea88a2..420c5c8 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -208,7 +208,7 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCMachObjectWriter(
OS,
@@ -224,8 +224,7 @@ namespace {
ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index b817394..3e3489f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -412,7 +412,7 @@ bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
}
}
-MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit,
bool IsLittleEndian,
uint8_t OSABI) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b9f0afb..725b47b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -44,7 +44,7 @@ public:
: MCII(mcii), CTX(ctx),
IsLittleEndian(ctx.getAsmInfo()->isLittleEndian()) {}
- ~PPCMCCodeEmitter() {}
+ ~PPCMCCodeEmitter() override {}
unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2f7a768..423e427 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -238,14 +238,12 @@ createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
return new PPCTargetMachOStreamer(S);
}
-static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
- return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+ const MCRegisterInfo &MRI) {
+ return new PPCInstPrinter(MAI, MII, MRI, T.isOSDarwin());
}
extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 8b1e3b4..5f2117c 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -18,6 +18,7 @@
#undef PPC
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
namespace llvm {
class MCAsmBackend;
@@ -29,6 +30,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_pwrite_stream;
class raw_ostream;
extern Target ThePPC32Target;
@@ -42,15 +44,42 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
-MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- bool IsLittleEndian,
- uint8_t OSABI);
-/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
-MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+/// Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
+ bool IsLittleEndian, uint8_t OSABI);
+/// Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint32_t CPUType,
uint32_t CPUSubtype);
+
+/// Returns true iff Val consists of one contiguous run of 1s with any number of
+/// 0s on either side. The 1s are allowed to wrap from LSB to MSB, so
+/// 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs. 0x0F0F0000 is not,
+/// since all 1s are not contiguous.
+static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index f7259b9..44e69b7 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -378,8 +378,8 @@ void PPCMachObjectWriter::RecordPPCRelocation(
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
- uint32_t CPUType,
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(
new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index f175f6d..1a02bcc 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -86,6 +86,10 @@ def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
"Enable the isel instruction">;
def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
"Enable the popcnt[dw] instructions">;
+def FeatureBPERMD : SubtargetFeature<"bpermd", "HasBPERMD", "true",
+ "Enable the bpermd instruction">;
+def FeatureExtDiv : SubtargetFeature<"extdiv", "HasExtDiv", "true",
+ "Enable extended divide instructions">;
def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
"Enable the ldbrx instruction">;
def FeatureCMPB : SubtargetFeature<"cmpb", "HasCMPB", "true",
@@ -118,6 +122,10 @@ def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
+def FeatureDirectMove :
+ SubtargetFeature<"direct-move", "HasDirectMove", "true",
+ "Enable Power8 direct move instructions",
+ [FeatureVSX]>;
def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics",
"HasPartwordAtomics", "true",
"Enable l[bh]arx and st[bh]cx.">;
@@ -133,6 +141,38 @@ def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
+/* Since new processors generally contain a superset of features of those that
+ came before them, the idea is to make implementations of new processors
+ less error prone and easier to read.
+ Namely:
+ list<SubtargetFeature> Power8FeatureList = ...
+ list<SubtargetFeature> FutureProcessorSpecificFeatureList =
+ [ features that Power8 does not support ]
+ list<SubtargetFeature> FutureProcessorFeatureList =
+ !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
+
+ Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
+ well as providing a single point of definition if the feature set will be
+ used elsewhere.
+*/
+def ProcessorFeatures {
+ list<SubtargetFeature> Power7FeatureList =
+ [DirectivePwr7, FeatureAltivec, FeatureVSX,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ FeatureBPERMD, FeatureExtDiv,
+ DeprecatedMFTB, DeprecatedDST];
+ list<SubtargetFeature> Power8SpecificFeatures =
+ [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
+ FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+ list<SubtargetFeature> Power8FeatureList =
+ !listconcat(Power7FeatureList, Power8SpecificFeatures);
+}
+
// Note: Future features to add when support is extended to more
// recent ISA levels:
//
@@ -243,33 +283,6 @@ def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE]>;
-/* Since new processors generally contain a superset of features of those that
- came before them, the idea is to make implementations of new processors
- less error prone and easier to read.
- Namely:
- list<SubtargetFeature> Power8FeatureList = ...
- list<SubtargetFeature> FutureProcessorSpecificFeatureList =
- [ features that Power8 does not support ]
- list<SubtargetFeature> FutureProcessorFeatureList =
- !listconcat(Power8FeatureList, FutureProcessorSpecificFeatureList)
-
- Makes it explicit and obvious what is new in FutureProcesor vs. Power8 as
- well as providing a single point of definition if the feature set will be
- used elsewhere.
-
-*/
-def ProcessorFeatures {
- list<SubtargetFeature> Power8FeatureList =
- [DirectivePwr8, FeatureAltivec, FeatureP8Altivec, FeatureVSX,
- FeatureP8Vector, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt,
- FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureHTM,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
- Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
- FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST];
-}
-
def : ProcessorModel<"970", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt,
@@ -339,15 +352,7 @@ def : ProcessorModel<"pwr6x", G5Model,
FeatureSTFIWX, FeatureLFIWAX, FeatureCMPB,
FeatureFPRND, Feature64Bit,
DeprecatedMFTB, DeprecatedDST]>;
-def : ProcessorModel<"pwr7", P7Model,
- [DirectivePwr7, FeatureAltivec, FeatureVSX,
- FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
- FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, FeatureFPCVT, FeatureISEL,
- FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic,
- DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cd60906..383a1e2 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1105,25 +1105,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
}
}
- MachineModuleInfoELF &MMIELF =
- MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
- OutContext),
- isPPC64 ? 8 : 4/*size*/);
- }
-
- Stubs.clear();
- OutStreamer.AddBlankLine();
- }
-
return AsmPrinter::doFinalization(M);
}
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index fbd7b6d..002616b 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -958,6 +958,8 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
}
// Attempt to fast-select an integer-to-floating-point conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
MVT DstVT;
Type *DstTy = I->getType();
@@ -1065,6 +1067,8 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
}
// Attempt to fast-select a floating-point-to-integer conversion.
+// FIXME: Once fast-isel has better support for VSX, conversions using
+// direct moves should be implemented.
bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
MVT DstVT, SrcVT;
Type *DstTy = I->getType();
@@ -1444,6 +1448,9 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
RetVT != MVT::i8)
return false;
+ else if (RetVT == MVT::i1 && PPCSubTarget->useCRBits())
+ // We can't handle boolean returns when CR bits are in use.
+ return false;
// FIXME: No multi-register return values yet.
if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3ac8e94..4f8d01b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -105,13 +105,6 @@ namespace {
return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
}
- /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
- /// with any number of 0s on either side. The 1s are allowed to wrap from
- /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
- /// 0x0F0F0000 is not, since all 1s are not contiguous.
- static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
-
-
/// isRotateAndMask - Returns true if Mask and Shift can be folded into a
/// rotate and mask opcode and mask operation.
static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
@@ -418,30 +411,6 @@ SDNode *PPCDAGToDAGISel::getFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
getSmallIPtrImm(Offset));
}
-bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
- if (!Val)
- return false;
-
- if (isShiftedMask_32(Val)) {
- // look for the first non-zero bit
- MB = countLeadingZeros(Val);
- // look for the first zero bit after the run of ones
- ME = countLeadingZeros((Val - 1) ^ Val);
- return true;
- } else {
- Val = ~Val; // invert mask
- if (isShiftedMask_32(Val)) {
- // effectively look for the first zero bit
- ME = countLeadingZeros(Val) - 1;
- // effectively look for the first one bit after the run of zeros
- MB = countLeadingZeros((Val - 1) ^ Val) + 1;
- return true;
- }
- }
- // no run present
- return false;
-}
-
bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
bool isShiftMask, unsigned &SH,
unsigned &MB, unsigned &ME) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 871531e..4c0b6a6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -996,6 +996,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
+ case PPCISD::MFVSR: return "PPCISD::MFVSR";
+ case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
+ case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -1287,22 +1290,6 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
-/// isAllNegativeZeroVector - Returns true if all elements of build_vector
-/// are -0.0.
-bool PPC::isAllNegativeZeroVector(SDNode *N) {
- BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
-
- APInt APVal, APUndef;
- unsigned BitSize;
- bool HasAnyUndefs;
-
- if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
- if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
- return CFP->getValueAPF().isNegZero();
-
- return false;
-}
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@@ -2234,7 +2221,7 @@ SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
// 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
return DAG.getMemcpy(Op.getOperand(0), Op,
Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(12, MVT::i32), 8, false, true,
+ DAG.getConstant(12, MVT::i32), 8, false, true, false,
MachinePointerInfo(), MachinePointerInfo());
}
@@ -3821,7 +3808,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(),
+ false, false, false, MachinePointerInfo(),
MachinePointerInfo());
}
@@ -5927,8 +5914,46 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
RLI.MPI = MPI;
}
+/// \brief Custom lowers floating point to integer conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(
+ Op.getOpcode() == ISD::FP_TO_SINT
+ ? PPCISD::FCTIWZ
+ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
+ break;
+ }
+ return Tmp;
+}
+
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDLoc dl) const {
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
+ return LowerFP_TO_INTDirectMove(Op, DAG, dl);
+
ReuseLoadInfo RLI;
LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
@@ -6006,6 +6031,38 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
}
+/// \brief Custom lowers integer to floating point conversions to use
+/// the direct move instructions available in ISA 2.07 to avoid the
+/// need for load/store combinations.
+SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert((Op.getValueType() == MVT::f32 ||
+ Op.getValueType() == MVT::f64) &&
+ "Invalid floating point type as target of conversion");
+ assert(Subtarget.hasFPCVT() &&
+ "Int to FP conversions with direct moves require FPCVT");
+ SDValue FP;
+ SDValue Src = Op.getOperand(0);
+ bool SinglePrec = Op.getValueType() == MVT::f32;
+ bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
+ bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
+ unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
+ (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
+
+ if (WordInt) {
+ FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
+ dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+ else {
+ FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
+ FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
+ }
+
+ return FP;
+}
+
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -6041,6 +6098,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
DAG.getConstantFP(1.0, Op.getValueType()),
DAG.getConstantFP(0.0, Op.getValueType()));
+ // If we have direct moves, we can do all the conversion, skip the store/load
+ // however, without FPCVT we can't do most conversions.
+ if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
+ return LowerINT_TO_FPDirectMove(Op, DAG, dl);
+
assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
"UINT_TO_FP is supported only with FPCVT");
@@ -6609,7 +6671,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
+ SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
@@ -6676,22 +6739,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
}
- // The remaining cases assume either big endian element order or
- // a splat-size that equates to the element size of the vector
- // to be built. An example that doesn't work for little endian is
- // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
- // and a vector element size of 16 bits. The code below will
- // produce the vector in big endian element order, which for little
- // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
-
- // For now, just avoid these optimizations in that case.
- // FIXME: Develop correct optimizations for LE with mismatched
- // splat and element sizes.
-
- if (Subtarget.isLittleEndian() &&
- SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
- return SDValue();
-
// Check to see if this is a wide variety of vsplti*, binop self cases.
static const signed char SplatCsts[] = {
-1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
@@ -7733,6 +7780,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
// LowerFP_TO_INT() can only handle f32 and f64.
if (N->getOperand(0).getValueType() == MVT::ppcf128)
return;
@@ -11023,21 +11071,23 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
bool IsMemset, bool ZeroMemset,
bool MemcpyStrSrc,
MachineFunction &MF) const {
- const Function *F = MF.getFunction();
- // When expanding a memset, require at least two QPX instructions to cover
- // the cost of loading the value to be stored from the constant pool.
- if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
- (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
- !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
- return MVT::v4f64;
- }
-
- // We should use Altivec/VSX loads and stores when available. For unaligned
- // addresses, unaligned VSX loads are only fast starting with the P8.
- if (Subtarget.hasAltivec() && Size >= 16 &&
- (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
- ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
- return MVT::v4i32;
+ if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
+ const Function *F = MF.getFunction();
+ // When expanding a memset, require at least two QPX instructions to cover
+ // the cost of loading the value to be stored from the constant pool.
+ if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
+ (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
+ !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
+ return MVT::v4f64;
+ }
+
+ // We should use Altivec/VSX loads and stores when available. For unaligned
+ // addresses, unaligned VSX loads are only fast starting with the P8.
+ if (Subtarget.hasAltivec() && Size >= 16 &&
+ (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
+ ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
+ return MVT::v4i32;
+ }
if (Subtarget.isPPC64()) {
return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 8afd7ef..7e2ebd4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -119,6 +119,15 @@ namespace llvm {
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
+ /// Direct move from a VSX register to a GPR
+ MFVSR,
+
+ /// Direct move from a GPR to a VSX register (algebraic)
+ MTVSRA,
+
+ /// Direct move from a GPR to a VSX register (zero)
+ MTVSRZ,
+
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
@@ -368,10 +377,6 @@ namespace llvm {
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
- /// isAllNegativeZeroVector - Returns true if all elements of build_vector
- /// are -0.0.
- bool isAllNegativeZeroVector(SDNode *N);
-
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
@@ -649,6 +654,10 @@ namespace llvm {
void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
+ SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const;
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 183d088..d1d67cb 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -603,6 +603,10 @@ defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
"popcntd $rA, $rS", IIC_IntGeneral,
[(set i64:$rA, (ctpop i64:$rS))]>;
+def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "bpermd $rA, $rS, $rB", IIC_IntGeneral,
+ [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+ isPPC64, Requires<[HasBPERMD]>;
let isCodeGenOnly = 1, isCommutable = 1 in
def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
@@ -616,14 +620,30 @@ def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
"popcntw $rA, $rS", IIC_IntGeneral,
[(set i32:$rA, (ctpop i32:$rS))]>;
-defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
+def DIVDE : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divde. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
+def DIVDEU : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu $rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+ isPPC64, Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVDEUo : XOForm_1<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdeu. $rT, $rA, $rB", IIC_IntDivD,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IIC_IntMulHD,
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index b7a7a1f..43c2158 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -764,6 +764,12 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = XT{5};
}
+class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let B = 0;
+}
+
class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 5eff156..8aecb65 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -726,6 +726,8 @@ def HasICBT : Predicate<"PPCSubTarget->hasICBT()">;
def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">;
def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">;
+def HasBPERMD : Predicate<"PPCSubTarget->hasBPERMD()">;
+def HasExtDiv : Predicate<"PPCSubTarget->hasExtDiv()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
@@ -802,6 +804,23 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
}
}
+// Multiclass for instructions for which the non record form is not cracked
+// and the record form is cracked (i.e. divw, mullw, etc.)
+multiclass XOForm_1rcr<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel, PPC970_DGroup_First,
+ PPC970_DGroup_Cracked;
+ }
+}
+
multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -2300,14 +2319,30 @@ defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
[(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
PPC970_DGroup_Cracked;
-defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
-defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
- PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
+def DIVWE : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEo : XOForm_1<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwe. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
+def DIVWEU : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu $rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+ Requires<[HasExtDiv]>;
+let Defs = [CR0] in
+def DIVWEUo : XOForm_1<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divweu. $rT, $rA, $rB", IIC_IntDivW,
+ []>, isDOT, PPC970_DGroup_Cracked, PPC970_DGroup_First,
+ Requires<[HasExtDiv]>;
let isCommutable = 1 in {
defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
@@ -3726,6 +3761,19 @@ def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)
def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def RLWINMbm : PPCAsmPseudo<"rlwinm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWINMobm : PPCAsmPseudo<"rlwinm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIbm : PPCAsmPseudo<"rlwimi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWIMIobm : PPCAsmPseudo<"rlwimi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMbm : PPCAsmPseudo<"rlwnm $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+def RLWNMobm : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u5imm:$n, i32imm:$b)>;
+
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
let PPC970_Unit = 7 in {
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index ec04da4..a98e58f 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -41,6 +41,9 @@ def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x,
def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x,
[SDNPHasChain, SDNPMayStore]>;
def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>;
+def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
+def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
+def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
@@ -946,6 +949,7 @@ def : Pat<(int_ppc_vsx_xvdivdp v2f64:$A, v2f64:$B),
when the elements are larger than i32.
*/
def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
+def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
let Predicates = [HasP8Vector] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let isCommutable = 1 in {
@@ -965,3 +969,24 @@ def XXLORC : XX3Form<60, 170,
[(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>;
} // AddedComplexity = 500
} // HasP8Vector
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// VSX direct move instructions
+def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
+ "mfvsrd $rA, $XT", IIC_VecGeneral,
+ [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ Requires<[In64BitMode]>;
+def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
+ "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ [(set i32:$rA, (PPCmfvsr f64:$XT))]>;
+def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
+ "mtvsrd $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ Requires<[In64BitMode]>;
+def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
+ "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+} // HasDirectMove, HasVSX
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index 005bcaf..2947c66 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "ppc-loop-data-prefetch"
#include "PPC.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -110,11 +111,9 @@ bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
bool MadeChange = false;
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- MadeChange |= runOnLoop(L);
- }
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
return MadeChange;
}
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 092a4ef..b6e7799 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "ppc-loop-preinc-prep"
#include "PPC.h"
#include "PPCTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -143,11 +144,9 @@ bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
bool MadeChange = false;
- for (LoopInfo::iterator I = LI->begin(), E = LI->end();
- I != E; ++I) {
- Loop *L = *I;
- MadeChange |= runOnLoop(L);
- }
+ for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I)
+ for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L)
+ MadeChange |= runOnLoop(*L);
return MadeChange;
}
@@ -159,16 +158,15 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!L->empty())
return MadeChange;
+ DEBUG(dbgs() << "PIP: Examining: " << *L << "\n");
+
BasicBlock *Header = L->getHeader();
const PPCSubtarget *ST =
TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;
- unsigned HeaderLoopPredCount = 0;
- for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI) {
- ++HeaderLoopPredCount;
- }
+ unsigned HeaderLoopPredCount =
+ std::distance(pred_begin(Header), pred_end(Header));
// Collect buckets of comparable addresses used by loads and stores.
typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
@@ -205,9 +203,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (L->isLoopInvariant(PtrValue))
continue;
- const SCEV *LSCEV = SE->getSCEV(PtrValue);
- if (!isa<SCEVAddRecExpr>(LSCEV))
+ const SCEV *LSCEV = SE->getSCEVAtScope(PtrValue, L);
+ if (const SCEVAddRecExpr *LARSCEV = dyn_cast<SCEVAddRecExpr>(LSCEV)) {
+ if (LARSCEV->getLoop() != L)
+ continue;
+ } else {
continue;
+ }
bool FoundBucket = false;
for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
@@ -236,11 +238,16 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
// returns a value (which might contribute to determining the loop's
// iteration space), insert a new preheader for the loop.
if (!LoopPredecessor ||
- !LoopPredecessor->getTerminator()->getType()->isVoidTy())
+ !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
LoopPredecessor = InsertPreheaderForLoop(L, this);
+ if (LoopPredecessor)
+ MadeChange = true;
+ }
if (!LoopPredecessor)
return MadeChange;
+ DEBUG(dbgs() << "PIP: Found " << Buckets.size() << " buckets\n");
+
SmallSet<BasicBlock *, 16> BBChanged;
for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
// The base address of each bucket is transformed into a phi and the others
@@ -251,6 +258,10 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!BasePtrSCEV->isAffine())
continue;
+ DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+ assert(BasePtrSCEV->getLoop() == L &&
+ "AddRec for the wrong loop?");
+
Instruction *MemI = Buckets[i].begin()->second;
Value *BasePtr = GetPointerOperand(MemI);
assert(BasePtr && "No pointer operand");
@@ -271,6 +282,8 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
if (!isSafeToExpand(BasePtrStartSCEV, *SE))
continue;
+ DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
MemI->hasName() ? MemI->getName() + ".phi" : "",
Header->getFirstNonPHI());
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 0965cb3..6df89fe 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -66,7 +66,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
unsigned OrigLen = Name.size() - PrefixLen;
Name += Suffix;
- MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
// If the target flags on the operand changes the name of the symbol, do that
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index ed88803..f313b0a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
@@ -83,6 +82,8 @@ void PPCSubtarget::initializeEnvironment() {
HasFPCVT = false;
HasISEL = false;
HasPOPCNTD = false;
+ HasBPERMD = false;
+ HasExtDiv = false;
HasCMPB = false;
HasLDBRX = false;
IsBookE = false;
@@ -96,6 +97,7 @@ void PPCSubtarget::initializeEnvironment() {
HasICBT = false;
HasInvariantFunctionDescriptors = false;
HasPartwordAtomics = false;
+ HasDirectMove = false;
IsQPXStackUnaligned = false;
HasHTM = false;
}
@@ -110,11 +112,6 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
else
CPUName = "generic";
}
-#if (defined(__APPLE__) || defined(__linux__)) && \
- (defined(__ppc__) || defined(__powerpc__))
- if (CPUName == "generic")
- CPUName = sys::getHostCPUName();
-#endif
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b4c1bb1..8d95508 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,6 +101,8 @@ protected:
bool HasFPCVT;
bool HasISEL;
bool HasPOPCNTD;
+ bool HasBPERMD;
+ bool HasExtDiv;
bool HasCMPB;
bool HasLDBRX;
bool IsBookE;
@@ -115,6 +117,7 @@ protected:
bool HasICBT;
bool HasInvariantFunctionDescriptors;
bool HasPartwordAtomics;
+ bool HasDirectMove;
bool HasHTM;
/// When targeting QPX running a stock PPC64 Linux kernel where the stack
@@ -225,6 +228,8 @@ public:
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasBPERMD() const { return HasBPERMD; }
+ bool hasExtDiv() const { return HasExtDiv; }
bool hasCMPB() const { return HasCMPB; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
@@ -239,6 +244,7 @@ public:
return HasInvariantFunctionDescriptors;
}
bool hasPartwordAtomics() const { return HasPartwordAtomics; }
+ bool hasDirectMove() const { return HasDirectMove; }
bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; }
unsigned getPlatformStackAlignment() const {
diff --git a/lib/Target/PowerPC/PPCTargetStreamer.h b/lib/Target/PowerPC/PPCTargetStreamer.h
index 6493713..8aaf5e1 100644
--- a/lib/Target/PowerPC/PPCTargetStreamer.h
+++ b/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
class PPCTargetStreamer : public MCTargetStreamer {
public:
PPCTargetStreamer(MCStreamer &S);
- virtual ~PPCTargetStreamer();
+ ~PPCTargetStreamer() override;
virtual void emitTCEntry(const MCSymbol &S) = 0;
virtual void emitMachine(StringRef CPU) = 0;
virtual void emitAbiVersion(int AbiVersion) = 0;
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index dfe988f..01233ae 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -622,6 +622,25 @@ void foo() {
__asm__("" ::: "cr2");
}
+//===-------------------------------------------------------------------------===
+Naming convention for instruction formats is very haphazard.
+We have agreed on a naming scheme as follows:
+
+<INST_form>{_<OP_type><OP_len>}+
+
+Where:
+INST_form is the instruction format (X-form, etc.)
+OP_type is the operand type - one of OPC (opcode), RD (register destination),
+ RS (register source),
+ RDp (destination register pair),
+ RSp (source register pair), IM (immediate),
+ XO (extended opcode)
+OP_len is the length of the operand in bits
+
+VSX register operands would be of length 6 (split across two fields),
+condition register fields of length 3.
+We would not need denote reserved fields in names of instruction formats.
+
//===----------------------------------------------------------------------===//
Instruction fusion was introduced in ISA 2.06 and more opportunities added in
diff --git a/lib/Target/PowerPC/README_ALTIVEC.txt b/lib/Target/PowerPC/README_ALTIVEC.txt
index 43d87d3..1d5b092 100644
--- a/lib/Target/PowerPC/README_ALTIVEC.txt
+++ b/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -277,7 +277,7 @@ This will generate the following instruction sequence:
This will almost certainly cause a load-hit-store hazard.
Since val is a value parameter, it should not need to be saved onto
the stack, unless it's being done set up the vector register. Instead,
-it would be better to splat teh value into a vector register, and then
+it would be better to splat the value into a vector register, and then
remove the (dead) stores to the stack.
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index e5d5ce2..2eb805e 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -133,6 +133,20 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
!cast<string>(Value),
"The size of local memory in bytes">;
+def FeatureGCN : SubtargetFeature<"gcn",
+ "IsGCN",
+ "true",
+ "GCN or newer GPU">;
+
+def FeatureGCN1Encoding : SubtargetFeature<"gcn1-encoding",
+ "GCN1Encoding",
+ "true",
+ "Encoding format for SI and CI">;
+
+def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
+ "GCN3Encoding",
+ "true",
+ "Encoding format for VI">;
class SubtargetFeatureGeneration <string Value,
list<SubtargetFeature> Implies> :
SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
@@ -158,15 +172,17 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768,
- FeatureWavefrontSize64]>;
+ FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding]>;
def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+ FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
+ FeatureGCN1Encoding]>;
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
- FeatureWavefrontSize64, FeatureFlatAddressSpace]>;
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding]>;
//===----------------------------------------------------------------------===//
@@ -197,8 +213,10 @@ def NullALU : InstrItinClass;
class PredicateControl {
Predicate SubtargetPredicate;
+ list<Predicate> AssemblerPredicates = [];
list<Predicate> OtherPredicates = [];
list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+ AssemblerPredicates,
OtherPredicates);
}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index d911014..b3480b4 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -17,6 +17,7 @@
//
#include "AMDGPUAsmPrinter.h"
+#include "InstPrinter/AMDGPUInstPrinter.h"
#include "AMDGPU.h"
#include "AMDKernelCodeT.h"
#include "AMDGPUSubtarget.h"
@@ -574,3 +575,24 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
OutStreamer.EmitBytes(StringRef((char*)&header, sizeof(header)));
}
+
+bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'r':
+ break;
+ }
+ }
+
+ AMDGPUInstPrinter::printRegOperand(MI->getOperand(OpNo).getReg(), O,
+ *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
+ return false;
+}
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index 58ffb1e..1acff3a 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -99,6 +99,10 @@ public:
void EmitEndOfAsmFile(Module &M) override;
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+
protected:
std::vector<std::string> DisasmLines, HexLines;
size_t DisasmLineMaxLen;
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index 7341cd9..def252a 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -345,7 +345,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned NOps = N->getNumOperands();
for (unsigned i = 0; i < NOps; i++) {
// XXX: Why is this here?
- if (dyn_cast<RegisterSDNode>(N->getOperand(i))) {
+ if (isa<RegisterSDNode>(N->getOperand(i))) {
IsRegSeq = false;
break;
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 62a33fa..7c5235d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -126,6 +126,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FRINT, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+ setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::FROUND, MVT::f32, Custom);
setOperationAction(ISD::FROUND, MVT::f64, Custom);
@@ -1685,14 +1687,8 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const unsigned bitPos = halfBitWidth - i - 1;
SDValue POS = DAG.getConstant(bitPos, HalfVT);
// Get value of high bit
- // TODO: Remove the BFE part when the optimization is fixed
- SDValue HBit;
- if (halfBitWidth == 32 && Subtarget->hasBFE()) {
- HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
- } else {
- HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
- HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
- }
+ SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
// Shift
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 4d08201..eeb7f3f 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -358,7 +358,7 @@ def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>;
def mskor_global : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
}]>;
@@ -389,7 +389,7 @@ def flat_store : PatFrag<(ops node:$val, node:$ptr),
def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
(AMDGPUstore_mskor node:$val, node:$ptr), [{
- return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
+ return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
}]>;
class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index f047ed0..7e274a9 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -124,7 +124,8 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
AMDGPUInstPrinter InstPrinter(*TM.getMCAsmInfo(),
*MF->getSubtarget().getInstrInfo(),
*MF->getSubtarget().getRegisterInfo());
- InstPrinter.printInst(&TmpInst, DisasmStream, StringRef());
+ InstPrinter.printInst(&TmpInst, DisasmStream, StringRef(),
+ MF->getSubtarget());
// Disassemble instruction/operands to hex representation.
SmallVector<MCFixup, 4> Fixups;
diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
index 175dcd8..6d5f94e 100644
--- a/lib/Target/R600/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp
@@ -366,8 +366,8 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
Function *F = Call->getCalledFunction();
FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
F->isVarArg());
- Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
- F->getAttributes());
+ Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
+ NewType, F->getAttributes());
Function *NewF = cast<Function>(C);
Call->setCalledFunction(NewF);
continue;
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 0ead652..259224a 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -71,6 +71,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false),
+ IsGCN(false), GCN1Encoding(false), GCN3Encoding(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 403a3e4..aeb0817 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -71,6 +71,9 @@ private:
int LocalMemorySize;
bool EnableVGPRSpilling;
bool SGPRInitBug;
+ bool IsGCN;
+ bool GCN1Encoding;
+ bool GCN3Encoding;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index ee6551b..c9b25a1 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -623,7 +623,7 @@ DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
++It) {
MachineInstr *instr = &(*It);
- if (!instr->getDebugLoc().isUnknown())
+ if (instr->getDebugLoc())
DL = instr->getDebugLoc();
}
return DL;
diff --git a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
index 49f0f23..aaf9b32 100644
--- a/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/R600/AsmParser/AMDGPUAsmParser.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -27,76 +29,105 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
-class AMDGPUAsmParser : public MCTargetAsmParser {
- MCSubtargetInfo &STI;
- MCAsmParser &Parser;
-
-
- /// @name Auto-generated Match Functions
- /// {
-
-#define GET_ASSEMBLER_HEADER
-#include "AMDGPUGenAsmMatcher.inc"
-
- /// }
-
-public:
- AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
- const MCInstrInfo &MII, const MCTargetOptions &Options)
- : MCTargetAsmParser(), STI(STI), Parser(Parser) {
- setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
- }
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands, MCStreamer &Out,
- uint64_t &ErrorInfo,
- bool MatchingInlineAsm) override;
- bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands) override;
-
- bool parseCnt(int64_t &IntVal);
- OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-};
+struct OptionalOperand;
class AMDGPUOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
- Immediate
+ Immediate,
+ Register,
+ Expression
} Kind;
+ SMLoc StartLoc, EndLoc;
+
public:
AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+ MCContext *Ctx;
+
+ enum ImmTy {
+ ImmTyNone,
+ ImmTyDSOffset0,
+ ImmTyDSOffset1,
+ ImmTyGDS,
+ ImmTyOffset,
+ ImmTyGLC,
+ ImmTySLC,
+ ImmTyTFE,
+ ImmTyClamp,
+ ImmTyOMod
+ };
+
struct TokOp {
const char *Data;
unsigned Length;
};
struct ImmOp {
+ bool IsFPImm;
+ ImmTy Type;
int64_t Val;
};
+ struct RegOp {
+ unsigned RegNo;
+ int Modifiers;
+ const MCRegisterInfo *TRI;
+ };
+
union {
TokOp Tok;
ImmOp Imm;
+ RegOp Reg;
+ const MCExpr *Expr;
};
void addImmOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::CreateImm(getImm()));
}
- void addRegOperands(MCInst &Inst, unsigned N) const {
- llvm_unreachable("addRegOperands");
- }
+
StringRef getToken() const {
return StringRef(Tok.Data, Tok.Length);
}
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
+
+ void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
+ if (isReg())
+ addRegOperands(Inst, N);
+ else
+ addImmOperands(Inst, N);
+ }
+
+ void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
+ Inst.addOperand(MCOperand::CreateImm(Reg.Modifiers));
+ addRegOperands(Inst, N);
+ }
+
+ void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
+ if (isImm())
+ addImmOperands(Inst, N);
+ else {
+ assert(isExpr());
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+ }
+
+ bool defaultTokenHasSuffix() const {
+ StringRef Token(Tok.Data, Tok.Length);
+
+ return Token.endswith("_e32") || Token.endswith("_e64");
+ }
+
bool isToken() const override {
return Kind == Token;
}
@@ -105,52 +136,369 @@ public:
return Kind == Immediate;
}
+ bool isInlineImm() const {
+ float F = BitsToFloat(Imm.Val);
+ // TODO: Add 0.5pi for VI
+ return isImm() && ((Imm.Val <= 64 && Imm.Val >= -16) ||
+ (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
+ F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0));
+ }
+
+ bool isDSOffset0() const {
+ assert(isImm());
+ return Imm.Type == ImmTyDSOffset0;
+ }
+
+ bool isDSOffset1() const {
+ assert(isImm());
+ return Imm.Type == ImmTyDSOffset1;
+ }
+
int64_t getImm() const {
return Imm.Val;
}
+ enum ImmTy getImmTy() const {
+ assert(isImm());
+ return Imm.Type;
+ }
+
bool isReg() const override {
- return false;
+ return Kind == Register && Reg.Modifiers == -1;
+ }
+
+ bool isRegWithInputMods() const {
+ return Kind == Register && Reg.Modifiers != -1;
+ }
+
+ void setModifiers(unsigned Mods) {
+ assert(isReg());
+ Reg.Modifiers = Mods;
}
unsigned getReg() const override {
- return 0;
+ return Reg.RegNo;
+ }
+
+ bool isRegOrImm() const {
+ return isReg() || isImm();
+ }
+
+ bool isRegClass(unsigned RCID) const {
+ return Reg.TRI->getRegClass(RCID).contains(getReg());
+ }
+
+ bool isSCSrc32() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+ }
+
+ bool isSSrc32() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::SReg_32RegClassID));
+ }
+
+ bool isSSrc64() const {
+ return isImm() || isInlineImm() ||
+ (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
+ }
+
+ bool isVCSrc32() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVCSrc64() const {
+ return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
+ }
+
+ bool isVSrc32() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
+ }
+
+ bool isVSrc64() const {
+ return isImm() || (isReg() && isRegClass(AMDGPU::VS_64RegClassID));
}
bool isMem() const override {
return false;
}
+ bool isExpr() const {
+ return Kind == Expression;
+ }
+
+ bool isSoppBrTarget() const {
+ return isExpr() || isImm();
+ }
+
SMLoc getStartLoc() const override {
- return SMLoc();
+ return StartLoc;
}
SMLoc getEndLoc() const override {
- return SMLoc();
+ return EndLoc;
}
void print(raw_ostream &OS) const override { }
- static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val) {
+ static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
+ enum ImmTy Type = ImmTyNone,
+ bool IsFPImm = false) {
auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
Op->Imm.Val = Val;
+ Op->Imm.IsFPImm = IsFPImm;
+ Op->Imm.Type = Type;
+ Op->StartLoc = Loc;
+ Op->EndLoc = Loc;
return Op;
}
- static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc) {
+ static std::unique_ptr<AMDGPUOperand> CreateToken(StringRef Str, SMLoc Loc,
+ bool HasExplicitEncodingSize = true) {
auto Res = llvm::make_unique<AMDGPUOperand>(Token);
Res->Tok.Data = Str.data();
Res->Tok.Length = Str.size();
+ Res->StartLoc = Loc;
+ Res->EndLoc = Loc;
return Res;
}
+ static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
+ SMLoc E,
+ const MCRegisterInfo *TRI) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+ Op->Reg.RegNo = RegNo;
+ Op->Reg.TRI = TRI;
+ Op->Reg.Modifiers = -1;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
+ static std::unique_ptr<AMDGPUOperand> CreateExpr(const class MCExpr *Expr, SMLoc S) {
+ auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+ Op->Expr = Expr;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ bool isDSOffset() const;
+ bool isDSOffset01() const;
bool isSWaitCnt() const;
+ bool isMubufOffset() const;
};
+class AMDGPUAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ const MCInstrInfo &MII;
+ MCAsmParser &Parser;
+
+ unsigned ForcedEncodingSize;
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "AMDGPUGenAsmMatcher.inc"
+
+ /// }
+
+public:
+ AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
+ ForcedEncodingSize(0){
+
+ if (!STI.getFeatureBits()) {
+ // Set default features.
+ STI.ToggleFeature("SOUTHERN_ISLANDS");
+ }
+
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ unsigned getForcedEncodingSize() const {
+ return ForcedEncodingSize;
+ }
+
+ void setForcedEncodingSize(unsigned Size) {
+ ForcedEncodingSize = Size;
+ }
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ uint64_t &ErrorInfo,
+ bool MatchingInlineAsm) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+ OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic);
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
+ int64_t Default = 0);
+ OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
+ OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy =
+ AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy =
+ AMDGPUOperand::ImmTyNone);
+ OperandMatchResultTy parseOptionalOps(
+ const ArrayRef<OptionalOperand> &OptionalOps,
+ OperandVector &Operands);
+
+
+ void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
+ void cvtDS(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
+
+ bool parseCnt(int64_t &IntVal);
+ OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+ OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
+
+ void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseOffset(OperandVector &Operands);
+ OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
+ OperandMatchResultTy parseGLC(OperandVector &Operands);
+ OperandMatchResultTy parseSLC(OperandVector &Operands);
+ OperandMatchResultTy parseTFE(OperandVector &Operands);
+
+ OperandMatchResultTy parseDMask(OperandVector &Operands);
+ OperandMatchResultTy parseUNorm(OperandVector &Operands);
+ OperandMatchResultTy parseR128(OperandVector &Operands);
+
+ void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
+ OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
+};
+
+struct OptionalOperand {
+ const char *Name;
+ AMDGPUOperand::ImmTy Type;
+ bool IsBit;
+ int64_t Default;
+ bool (*ConvertResult)(int64_t&);
+};
+
+}
+
+static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+ if (IsVgpr) {
+ switch (RegWidth) {
+ default: llvm_unreachable("Unknown register width");
+ case 1: return AMDGPU::VGPR_32RegClassID;
+ case 2: return AMDGPU::VReg_64RegClassID;
+ case 3: return AMDGPU::VReg_96RegClassID;
+ case 4: return AMDGPU::VReg_128RegClassID;
+ case 8: return AMDGPU::VReg_256RegClassID;
+ case 16: return AMDGPU::VReg_512RegClassID;
+ }
+ }
+
+ switch (RegWidth) {
+ default: llvm_unreachable("Unknown register width");
+ case 1: return AMDGPU::SGPR_32RegClassID;
+ case 2: return AMDGPU::SGPR_64RegClassID;
+ case 4: return AMDGPU::SReg_128RegClassID;
+ case 8: return AMDGPU::SReg_256RegClassID;
+ case 16: return AMDGPU::SReg_512RegClassID;
+ }
+}
+
+static unsigned getRegForName(const StringRef &RegName) {
+
+ return StringSwitch<unsigned>(RegName)
+ .Case("exec", AMDGPU::EXEC)
+ .Case("vcc", AMDGPU::VCC)
+ .Case("flat_scr", AMDGPU::FLAT_SCR)
+ .Case("m0", AMDGPU::M0)
+ .Case("scc", AMDGPU::SCC)
+ .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
+ .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+ .Case("vcc_lo", AMDGPU::VCC_LO)
+ .Case("vcc_hi", AMDGPU::VCC_HI)
+ .Case("exec_lo", AMDGPU::EXEC_LO)
+ .Case("exec_hi", AMDGPU::EXEC_HI)
+ .Default(0);
}
bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
- return true;
+ const AsmToken Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ const StringRef &RegName = Tok.getString();
+ RegNo = getRegForName(RegName);
+
+ if (RegNo) {
+ Parser.Lex();
+ return false;
+ }
+
+ // Match vgprs and sgprs
+ if (RegName[0] != 's' && RegName[0] != 'v')
+ return true;
+
+ bool IsVgpr = RegName[0] == 'v';
+ unsigned RegWidth;
+ unsigned RegIndexInClass;
+ if (RegName.size() > 1) {
+ // We have a 32-bit register
+ RegWidth = 1;
+ if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
+ return true;
+ Parser.Lex();
+ } else {
+ // We have a register greater than 32-bits.
+
+ int64_t RegLo, RegHi;
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::LBrac))
+ return true;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(RegLo))
+ return true;
+
+ if (getLexer().isNot(AsmToken::Colon))
+ return true;
+
+ Parser.Lex();
+ if (getParser().parseAbsoluteExpression(RegHi))
+ return true;
+
+ if (getLexer().isNot(AsmToken::RBrac))
+ return true;
+
+ Parser.Lex();
+ RegWidth = (RegHi - RegLo) + 1;
+ if (IsVgpr) {
+ // VGPR registers aren't aligned.
+ RegIndexInClass = RegLo;
+ } else {
+ // SGPR registers are aligned. Max alignment is 4 dwords.
+ RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+ }
+ }
+
+ const MCRegisterInfo *TRC = getContext().getRegisterInfo();
+ unsigned RC = getRegClass(IsVgpr, RegWidth);
+ if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+ return true;
+ RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
+ return false;
+}
+
+unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+
+ if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
+ (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
+ return Match_InvalidOperand;
+
+ return Match_Success;
}
@@ -162,22 +510,30 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
MCInst Inst;
switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
- case Match_Success:
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
- return false;
- case Match_MissingFeature:
- return Error(IDLoc, "instruction use requires an option to be enabled");
- case Match_MnemonicFail:
- return Error(IDLoc, "unrecognized instruction mnemonic");
- case Match_InvalidOperand: {
- if (ErrorInfo != ~0ULL) {
- if (ErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+ default: break;
+ case Match_Success:
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "missing feature");
+
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0ULL) {
+ if (ErrorInfo >= Operands.size()) {
+ return Error(IDLoc, "too few operands for instruction");
+ }
+ ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ return Error(ErrorLoc, "invalid operand for instruction");
}
- return Error(IDLoc, "invalid operand for instruction");
- }
}
llvm_unreachable("Implement any new match types added!");
}
@@ -186,6 +542,19 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
return true;
}
+static bool operandsHaveModifiers(const OperandVector &Operands) {
+
+ for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+ const AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+ if (Op.isRegWithInputMods())
+ return true;
+ if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
+ Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+ return true;
+ }
+ return false;
+}
+
AMDGPUAsmParser::OperandMatchResultTy
AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
@@ -194,17 +563,104 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail)
+ //
+ // If we are parsing after we reach EndOfStatement then this means we
+ // are appending default values to the Operands list. This is only done
+ // by custom parser, so we shouldn't continue on to the generic parsing.
+ if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
+ getLexer().is(AsmToken::EndOfStatement))
return ResTy;
+ bool Negate = false, Abs = false;
+ if (getLexer().getKind()== AsmToken::Minus) {
+ Parser.Lex();
+ Negate = true;
+ }
+
+ if (getLexer().getKind() == AsmToken::Pipe) {
+ Parser.Lex();
+ Abs = true;
+ }
+
switch(getLexer().getKind()) {
case AsmToken::Integer: {
+ SMLoc S = Parser.getTok().getLoc();
int64_t IntVal;
if (getParser().parseAbsoluteExpression(IntVal))
return MatchOperand_ParseFail;
- Operands.push_back(AMDGPUOperand::CreateImm(IntVal));
+ APInt IntVal32(32, IntVal);
+ if (IntVal32.getSExtValue() != IntVal) {
+ Error(S, "invalid immediate: only 32-bit values are legal");
+ return MatchOperand_ParseFail;
+ }
+
+ IntVal = IntVal32.getSExtValue();
+ if (Negate)
+ IntVal *= -1;
+ Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
return MatchOperand_Success;
}
+ case AsmToken::Real: {
+ // FIXME: We should emit an error if a double precisions floating-point
+ // value is used. I'm not sure the best way to detect this.
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t IntVal;
+ if (getParser().parseAbsoluteExpression(IntVal))
+ return MatchOperand_ParseFail;
+
+ APFloat F((float)BitsToDouble(IntVal));
+ if (Negate)
+ F.changeSign();
+ Operands.push_back(
+ AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S));
+ return MatchOperand_Success;
+ }
+ case AsmToken::Identifier: {
+ SMLoc S, E;
+ unsigned RegNo;
+ if (!ParseRegister(RegNo, S, E)) {
+
+ bool HasModifiers = operandsHaveModifiers(Operands);
+ unsigned Modifiers = 0;
+
+ if (Negate)
+ Modifiers |= 0x1;
+
+ if (Abs) {
+ if (getLexer().getKind() != AsmToken::Pipe)
+ return MatchOperand_ParseFail;
+ Parser.Lex();
+ Modifiers |= 0x2;
+ }
+
+ if (Modifiers && !HasModifiers) {
+ // We are adding a modifier to src1 or src2 and previous sources
+ // don't have modifiers, so we need to go back and empty modifers
+ // for each previous source.
+ for (unsigned PrevRegIdx = Operands.size() - 1; PrevRegIdx > 1;
+ --PrevRegIdx) {
+
+ AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[PrevRegIdx]);
+ RegOp.setModifiers(0);
+ }
+ }
+
+
+ Operands.push_back(AMDGPUOperand::CreateReg(
+ RegNo, S, E, getContext().getRegisterInfo()));
+
+ if (HasModifiers || Modifiers) {
+ AMDGPUOperand &RegOp = ((AMDGPUOperand&)*Operands[Operands.size() - 1]);
+ RegOp.setModifiers(Modifiers);
+
+ }
+ } else {
+ Operands.push_back(AMDGPUOperand::CreateToken(Parser.getTok().getString(),
+ S));
+ Parser.Lex();
+ }
+ return MatchOperand_Success;
+ }
default:
return MatchOperand_NoMatch;
}
@@ -213,22 +669,282 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name,
SMLoc NameLoc, OperandVector &Operands) {
+
+ // Clear any forced encodings from the previous instruction.
+ setForcedEncodingSize(0);
+
+ if (Name.endswith("_e64"))
+ setForcedEncodingSize(64);
+ else if (Name.endswith("_e32"))
+ setForcedEncodingSize(32);
+
// Add the instruction mnemonic
Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
- if (getLexer().is(AsmToken::EndOfStatement))
- return false;
+ while (!getLexer().is(AsmToken::EndOfStatement)) {
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+
+ // Eat the comma or space if there is one.
+ if (getLexer().is(AsmToken::Comma))
+ Parser.Lex();
- AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
- switch (Res) {
- case MatchOperand_Success: return false;
- case MatchOperand_ParseFail: return Error(NameLoc,
- "Failed parsing operand");
- case MatchOperand_NoMatch: return Error(NameLoc, "Not a valid operand");
+ switch (Res) {
+ case MatchOperand_Success: break;
+ case MatchOperand_ParseFail: return Error(getLexer().getLoc(),
+ "failed parsing operand.");
+ case MatchOperand_NoMatch: return Error(getLexer().getLoc(),
+ "not a valid operand.");
+ }
}
- return true;
+
+ // Once we reach end of statement, continue parsing so we can add default
+ // values for optional arguments.
+ AMDGPUAsmParser::OperandMatchResultTy Res;
+ while ((Res = parseOperand(Operands, Name)) != MatchOperand_NoMatch) {
+ if (Res != MatchOperand_Success)
+ return Error(getLexer().getLoc(), "failed parsing operand.");
+ }
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
+ int64_t Default) {
+
+ // We are at the end of the statement, and this is a default argument, so
+ // use a default value.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ Int = Default;
+ return MatchOperand_Success;
+ }
+
+ switch(getLexer().getKind()) {
+ default: return MatchOperand_NoMatch;
+ case AsmToken::Identifier: {
+ StringRef OffsetName = Parser.getTok().getString();
+ if (!OffsetName.equals(Prefix))
+ return MatchOperand_NoMatch;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Colon))
+ return MatchOperand_ParseFail;
+
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Integer))
+ return MatchOperand_ParseFail;
+
+ if (getParser().parseAbsoluteExpression(Int))
+ return MatchOperand_ParseFail;
+ break;
+ }
+ }
+ return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy) {
+
+ SMLoc S = Parser.getTok().getLoc();
+ int64_t Offset = 0;
+
+ AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+ return MatchOperand_Success;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
+ enum AMDGPUOperand::ImmTy ImmTy) {
+ int64_t Bit = 0;
+ SMLoc S = Parser.getTok().getLoc();
+
+ // We are at the end of the statement, and this is a default argument, so
+ // use a default value.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ switch(getLexer().getKind()) {
+ case AsmToken::Identifier: {
+ StringRef Tok = Parser.getTok().getString();
+ if (Tok == Name) {
+ Bit = 1;
+ Parser.Lex();
+ } else if (Tok.startswith("no") && Tok.endswith(Name)) {
+ Bit = 0;
+ Parser.Lex();
+ } else {
+ return MatchOperand_NoMatch;
+ }
+ break;
+ }
+ default:
+ return MatchOperand_NoMatch;
+ }
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+ return MatchOperand_Success;
+}
+
+static bool operandsHasOptionalOp(const OperandVector &Operands,
+ const OptionalOperand &OOp) {
+ for (unsigned i = 0; i < Operands.size(); i++) {
+ const AMDGPUOperand &ParsedOp = ((const AMDGPUOperand &)*Operands[i]);
+ if ((ParsedOp.isImm() && ParsedOp.getImmTy() == OOp.Type) ||
+ (ParsedOp.isToken() && ParsedOp.getToken() == OOp.Name))
+ return true;
+
+ }
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOptionalOps(const ArrayRef<OptionalOperand> &OptionalOps,
+ OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ for (const OptionalOperand &Op : OptionalOps) {
+ if (operandsHasOptionalOp(Operands, Op))
+ continue;
+ AMDGPUAsmParser::OperandMatchResultTy Res;
+ int64_t Value;
+ if (Op.IsBit) {
+ Res = parseNamedBit(Op.Name, Operands, Op.Type);
+ if (Res == MatchOperand_NoMatch)
+ continue;
+ return Res;
+ }
+
+ Res = parseIntWithPrefix(Op.Name, Value, Op.Default);
+
+ if (Res == MatchOperand_NoMatch)
+ continue;
+
+ if (Res != MatchOperand_Success)
+ return Res;
+
+ if (Op.ConvertResult && !Op.ConvertResult(Value)) {
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(AMDGPUOperand::CreateImm(Value, S, Op.Type));
+ return MatchOperand_Success;
+ }
+ return MatchOperand_NoMatch;
+}
+
+//===----------------------------------------------------------------------===//
+// ds
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand DSOptionalOps [] = {
+ {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+ {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+static const OptionalOperand DSOptionalOpsOff01 [] = {
+ {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
+ {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
+ {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(DSOptionalOps, Operands);
+}
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(DSOptionalOpsOff01, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ AMDGPUAsmParser::OperandMatchResultTy Res =
+ parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
+ if (Res == MatchOperand_NoMatch) {
+ Operands.push_back(AMDGPUOperand::CreateImm(0, S,
+ AMDGPUOperand::ImmTyOffset));
+ Res = MatchOperand_Success;
+ }
+ return Res;
+}
+
+bool AMDGPUOperand::isDSOffset() const {
+ return isImm() && isUInt<16>(getImm());
+}
+
+bool AMDGPUOperand::isDSOffset01() const {
+ return isImm() && isUInt<8>(getImm());
+}
+
+void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
+ const OperandVector &Operands) {
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned Offset0Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset0];
+ unsigned Offset1Idx = OptionalIdx[AMDGPUOperand::ImmTyDSOffset1];
+ unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+
+ ((AMDGPUOperand &)*Operands[Offset0Idx]).addImmOperands(Inst, 1); // offset0
+ ((AMDGPUOperand &)*Operands[Offset1Idx]).addImmOperands(Inst, 1); // offset1
+ ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+ Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
}
+void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+ bool GDSOnly = false;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ if (Op.isToken() && Op.getToken() == "gds") {
+ GDSOnly = true;
+ continue;
+ }
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+ ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1); // offset
+
+ if (!GDSOnly) {
+ unsigned GDSIdx = OptionalIdx[AMDGPUOperand::ImmTyGDS];
+ ((AMDGPUOperand &)*Operands[GDSIdx]).addImmOperands(Inst, 1); // gds
+ }
+ Inst.addOperand(MCOperand::CreateReg(AMDGPU::M0)); // m0
+}
+
+
//===----------------------------------------------------------------------===//
// s_waitcnt
//===----------------------------------------------------------------------===//
@@ -283,6 +999,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
// expcnt [6:4]
// lgkmcnt [10:8]
int64_t CntVal = 0x77f;
+ SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
default: return MatchOperand_ParseFail;
@@ -299,7 +1016,7 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
} while(getLexer().isNot(AsmToken::EndOfStatement));
break;
}
- Operands.push_back(AMDGPUOperand::CreateImm(CntVal));
+ Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
return MatchOperand_Success;
}
@@ -307,6 +1024,245 @@ bool AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
+//===----------------------------------------------------------------------===//
+// sopp branch targets
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+
+ switch (getLexer().getKind()) {
+ default: return MatchOperand_ParseFail;
+ case AsmToken::Integer: {
+ int64_t Imm;
+ if (getParser().parseAbsoluteExpression(Imm))
+ return MatchOperand_ParseFail;
+ Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+ return MatchOperand_Success;
+ }
+
+ case AsmToken::Identifier:
+ Operands.push_back(AMDGPUOperand::CreateExpr(
+ MCSymbolRefExpr::Create(getContext().GetOrCreateSymbol(
+ Parser.getTok().getString()), getContext()), S));
+ Parser.Lex();
+ return MatchOperand_Success;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// mubuf
+//===----------------------------------------------------------------------===//
+
+static const OptionalOperand MubufOptionalOps [] = {
+ {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+ {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+ {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+ {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
+};
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
+ return parseOptionalOps(MubufOptionalOps, Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
+ return parseIntWithPrefix("offset", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
+ return parseNamedBit("glc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
+ return parseNamedBit("slc", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
+ return parseNamedBit("tfe", Operands);
+}
+
+bool AMDGPUOperand::isMubufOffset() const {
+ return isImm() && isUInt<12>(getImm());
+}
+
+void AMDGPUAsmParser::cvtMubuf(MCInst &Inst,
+ const OperandVector &Operands) {
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ // Add the register arguments
+ if (Op.isReg()) {
+ Op.addRegOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle the case where soffset is an immediate
+ if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
+ Op.addImmOperands(Inst, 1);
+ continue;
+ }
+
+ // Handle tokens like 'offen' which are sometimes hard-coded into the
+ // asm string. There are no MCInst operands for these.
+ if (Op.isToken()) {
+ continue;
+ }
+ assert(Op.isImm());
+
+ // Handle optional arguments
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ assert(OptionalIdx.size() == 4);
+
+ unsigned OffsetIdx = OptionalIdx[AMDGPUOperand::ImmTyOffset];
+ unsigned GLCIdx = OptionalIdx[AMDGPUOperand::ImmTyGLC];
+ unsigned SLCIdx = OptionalIdx[AMDGPUOperand::ImmTySLC];
+ unsigned TFEIdx = OptionalIdx[AMDGPUOperand::ImmTyTFE];
+
+ ((AMDGPUOperand &)*Operands[OffsetIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[GLCIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[SLCIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[TFEIdx]).addImmOperands(Inst, 1);
+}
+
+//===----------------------------------------------------------------------===//
+// mimg
+//===----------------------------------------------------------------------===//
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseDMask(OperandVector &Operands) {
+ return parseIntWithPrefix("dmask", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseUNorm(OperandVector &Operands) {
+ return parseNamedBit("unorm", Operands);
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseR128(OperandVector &Operands) {
+ return parseNamedBit("r128", Operands);
+}
+
+//===----------------------------------------------------------------------===//
+// vop3
+//===----------------------------------------------------------------------===//
+
+static bool ConvertOmodMul(int64_t &Mul) {
+ if (Mul != 1 && Mul != 2 && Mul != 4)
+ return false;
+
+ Mul >>= 1;
+ return true;
+}
+
+static bool ConvertOmodDiv(int64_t &Div) {
+ if (Div == 1) {
+ Div = 0;
+ return true;
+ }
+
+ if (Div == 2) {
+ Div = 3;
+ return true;
+ }
+
+ return false;
+}
+
+static const OptionalOperand VOP3OptionalOps [] = {
+ {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
+ {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
+ {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
+};
+
+static bool isVOP3(OperandVector &Operands) {
+ if (operandsHaveModifiers(Operands))
+ return true;
+
+ AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
+
+ if (DstOp.isReg() && DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
+ return true;
+
+ if (Operands.size() >= 5)
+ return true;
+
+ if (Operands.size() > 3) {
+ AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
+ if (Src1Op.getReg() && (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
+ Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)))
+ return true;
+ }
+ return false;
+}
+
+AMDGPUAsmParser::OperandMatchResultTy
+AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
+
+ // The value returned by this function may change after parsing
+ // an operand so store the original value here.
+ bool HasModifiers = operandsHaveModifiers(Operands);
+
+ bool IsVOP3 = isVOP3(Operands);
+ if (HasModifiers || IsVOP3 ||
+ getLexer().isNot(AsmToken::EndOfStatement) ||
+ getForcedEncodingSize() == 64) {
+
+ AMDGPUAsmParser::OperandMatchResultTy Res =
+ parseOptionalOps(VOP3OptionalOps, Operands);
+
+ if (!HasModifiers && Res == MatchOperand_Success) {
+ // We have added a modifier operation, so we need to make sure all
+ // previous register operands have modifiers
+ for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
+ if (Op.isReg())
+ Op.setModifiers(0);
+ }
+ }
+ return Res;
+ }
+ return MatchOperand_NoMatch;
+}
+
+void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
+ ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
+ unsigned i = 2;
+
+ std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
+
+ if (operandsHaveModifiers(Operands)) {
+ for (unsigned e = Operands.size(); i != e; ++i) {
+ AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+ if (Op.isRegWithInputMods()) {
+ ((AMDGPUOperand &)*Operands[i]).addRegWithInputModsOperands(Inst, 2);
+ continue;
+ }
+ OptionalIdx[Op.getImmTy()] = i;
+ }
+
+ unsigned ClampIdx = OptionalIdx[AMDGPUOperand::ImmTyClamp];
+ unsigned OModIdx = OptionalIdx[AMDGPUOperand::ImmTyOMod];
+
+ ((AMDGPUOperand &)*Operands[ClampIdx]).addImmOperands(Inst, 1);
+ ((AMDGPUOperand &)*Operands[OModIdx]).addImmOperands(Inst, 1);
+ } else {
+ for (unsigned e = Operands.size(); i != e; ++i)
+ ((AMDGPUOperand &)*Operands[i]).addRegOrImmOperands(Inst, 1);
+ }
+}
+
/// Force static initialization.
extern "C" void LLVMInitializeR600AsmParser() {
RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index d62fd3f..279c3eb 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
OS.flush();
printInstruction(MI, OS);
@@ -89,14 +89,18 @@ void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << " offset0:";
- printU8ImmDecOperand(MI, OpNo, O);
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset0:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
}
void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << " offset1:";
- printU8ImmDecOperand(MI, OpNo, O);
+ if (MI->getOperand(OpNo).getImm()) {
+ O << " offset1:";
+ printU8ImmDecOperand(MI, OpNo, O);
+ }
}
void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
@@ -123,7 +127,8 @@ void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
O << " tfe";
}
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
+void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+ const MCRegisterInfo &MRI) {
switch (reg) {
case AMDGPU::VCC:
O << "vcc";
@@ -293,7 +298,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
break;
default:
- printRegOperand(Op.getReg(), O);
+ printRegOperand(Op.getReg(), O, MRI);
break;
}
} else if (Op.isImm()) {
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 5289718..14fb511 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,7 +29,10 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ static void printRegOperand(unsigned RegNo, raw_ostream &O,
+ const MCRegisterInfo &MRI);
private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index d0c634f..f33e692 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -24,7 +24,7 @@ namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
- AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
+ AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
void ExecutePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
@@ -131,7 +131,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
public:
ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createAMDGPUELFObjectWriter(OS);
}
};
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 5fb94d5..59f45ff 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -33,7 +33,7 @@ protected:
AMDGPUELFObjectWriter::AMDGPUELFObjectWriter()
: MCELFObjectTargetWriter(false, 0, 0, false) { }
-MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_ostream &OS) {
+MCObjectWriter *llvm::createAMDGPUELFObjectWriter(raw_pwrite_stream &OS) {
MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter();
return createELFObjectWriter(MOTW, OS, true);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index fb2deef..7b280a4 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -64,12 +64,11 @@ static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
+static MCInstPrinter *createAMDGPUMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new AMDGPUInstPrinter(MAI, MII, MRI);
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
index 23f0196..9a7548e 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
+class raw_pwrite_stream;
class raw_ostream;
extern Target TheAMDGPUTarget;
@@ -44,7 +45,7 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-MCObjectWriter *createAMDGPUELFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createAMDGPUELFObjectWriter(raw_pwrite_stream &OS);
} // End llvm namespace
#define GET_REGINFO_ENUM
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index 760aa37..24f2b6d 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -49,7 +49,7 @@ public:
MCContext &ctx)
: MCII(mcii), MRI(mri), Ctx(ctx) { }
- ~SIMCCodeEmitter() { }
+ ~SIMCCodeEmitter() override {}
/// \brief Encode the instruction and write it to the OS.
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index a34e2dc..b6b7067 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -1811,7 +1811,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
}
@@ -1819,7 +1819,7 @@ SDValue Swz[4], SelectionDAG &DAG) const {
SwizzleRemap.clear();
BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
- unsigned Idx = dyn_cast<ConstantSDNode>(Swz[i])->getZExtValue();
+ unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], MVT::i32);
}
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 291fb04..7126c82 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -683,6 +683,11 @@ def MUL_IEEE : R600_2OP_Helper <0x2, "MUL_IEEE", fmul>;
// TODO: Do these actually match the regular fmin/fmax behavior?
def MAX : R600_2OP_Helper <0x3, "MAX", AMDGPUfmax_legacy>;
def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin_legacy>;
+// According to https://msdn.microsoft.com/en-us/library/windows/desktop/cc308050%28v=vs.85%29.aspx
+// DX10 min/max returns the other operand if one is NaN,
+// this matches http://llvm.org/docs/LangRef.html#llvm-minnum-intrinsic
+def MAX_DX10 : R600_2OP_Helper <0x5, "MAX_DX10", fmaxnum>;
+def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
// For the SET* instructions there is a naming conflict in TargetSelectionDAG.td,
// so some of the instruction names don't match the asm string.
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 419ec8b..2fc7b02 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -162,7 +162,7 @@ class R600TextureIntrinsicsReplacer :
Value *SamplerId = I.getArgOperand(2);
unsigned TextureType =
- dyn_cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
unsigned CT[4] = {1, 1, 1, 1};
@@ -186,7 +186,7 @@ class R600TextureIntrinsicsReplacer :
Value *SamplerId = I.getArgOperand(5);
unsigned TextureType =
- dyn_cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
+ cast<ConstantInt>(I.getArgOperand(6))->getZExtValue();
unsigned SrcSelect[4] = { 0, 1, 2, 3 };
unsigned CT[4] = {1, 1, 1, 1};
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index 79f6532..d39ab3f 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -83,7 +83,7 @@ class SIAnnotateControlFlow : public FunctionPass {
void insertElse(BranchInst *Term);
- Value *handleLoopCondition(Value *Cond, PHINode *Broken);
+ Value *handleLoopCondition(Value *Cond, PHINode *Broken, llvm::Loop *L);
void handleLoop(BranchInst *Term);
@@ -207,7 +207,8 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) {
}
/// \brief Recursively handle the condition leading to a loop
-Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) {
+Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken,
+ llvm::Loop *L) {
if (PHINode *Phi = dyn_cast<PHINode>(Cond)) {
BasicBlock *Parent = Phi->getParent();
PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front());
@@ -223,7 +224,7 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
}
Phi->setIncomingValue(i, BoolFalse);
- Value *PhiArg = handleLoopCondition(Incoming, Broken);
+ Value *PhiArg = handleLoopCondition(Incoming, Broken, L);
NewPhi->addIncoming(PhiArg, From);
}
@@ -253,7 +254,12 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
} else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) {
BasicBlock *Parent = Inst->getParent();
- TerminatorInst *Insert = Parent->getTerminator();
+ Instruction *Insert;
+ if (L->contains(Inst)) {
+ Insert = Parent->getTerminator();
+ } else {
+ Insert = L->getHeader()->getFirstNonPHIOrDbgOrLifetime();
+ }
Value *Args[] = { Cond, Broken };
return CallInst::Create(IfBreak, Args, "", Insert);
@@ -265,14 +271,15 @@ Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken)
/// \brief Handle a back edge (loop)
void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
+ BasicBlock *BB = Term->getParent();
+ llvm::Loop *L = LI->getLoopFor(BB);
BasicBlock *Target = Term->getSuccessor(1);
PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
Value *Cond = Term->getCondition();
Term->setCondition(BoolTrue);
- Value *Arg = handleLoopCondition(Cond, Broken);
+ Value *Arg = handleLoopCondition(Cond, Broken, L);
- BasicBlock *BB = Term->getParent();
for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target);
PI != PE; ++PI) {
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index bd0c3c2..43507d8 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -76,8 +76,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setOperationAction(ISD::FSIN, MVT::f32, Custom);
setOperationAction(ISD::FCOS, MVT::f32, Custom);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -2089,3 +2087,38 @@ SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
cast<RegisterSDNode>(VReg)->getReg(), VT);
}
+
+//===----------------------------------------------------------------------===//
+// SI Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass *>
+SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+ const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint == "r") {
+ switch(VT.SimpleTy) {
+ default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
+ case MVT::i64:
+ return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ case MVT::i32:
+ return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+ }
+ }
+
+ if (Constraint.size() > 1) {
+ const TargetRegisterClass *RC = nullptr;
+ if (Constraint[1] == 'v') {
+ RC = &AMDGPU::VGPR_32RegClass;
+ } else if (Constraint[1] == 's') {
+ RC = &AMDGPU::SGPR_32RegClass;
+ }
+
+ if (RC) {
+ unsigned Idx = std::atoi(Constraint.substr(2).c_str());
+ if (Idx < RC->getNumRegs())
+ return std::make_pair(RC->getRegister(Idx), RC);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index 92f5847..a6bc7c6 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -113,6 +113,10 @@ public:
MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
SDLoc DL,
SDValue Ptr) const;
+
+ std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
+ const TargetRegisterInfo *TRI,
+ const std::string &Constraint, MVT VT) const override;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index 4167590..bc693c3 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -130,6 +130,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
let AddedComplexity = -1000;
let VOP3 = 1;
+ let VALU = 1;
+
+ let AsmMatchConverter = "cvtVOP3";
+ let isCodeGenOnly = 0;
+
int Size = 8;
}
@@ -181,6 +186,19 @@ class SOPKe <bits<5> op> : Enc32 {
let Inst{31-28} = 0xb; //encoding
}
+class SOPK64e <bits<5> op> : Enc64 {
+ bits <7> sdst = 0;
+ bits <16> simm16;
+ bits <32> imm;
+
+ let Inst{15-0} = simm16;
+ let Inst{22-16} = sdst;
+ let Inst{27-23} = op;
+ let Inst{31-28} = 0xb;
+
+ let Inst{63-32} = imm;
+}
+
class SOPPe <bits<7> op> : Enc32 {
bits <16> simm16;
@@ -208,6 +226,7 @@ class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
let SALU = 1;
let SOP1 = 1;
}
@@ -218,6 +237,7 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let isCodeGenOnly = 0;
let SALU = 1;
let SOP2 = 1;
@@ -233,6 +253,7 @@ class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
let hasSideEffects = 0;
let SALU = 1;
let SOPC = 1;
+ let isCodeGenOnly = 0;
let UseNamedOperandTable = 1;
}
@@ -550,10 +571,14 @@ let Uses = [EXEC] in {
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
VOP1Common <outs, ins, asm, pattern>,
- VOP1e<op>;
+ VOP1e<op> {
+ let isCodeGenOnly = 0;
+}
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
- VOP2Common <outs, ins, asm, pattern>, VOP2e<op>;
+ VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
+ let isCodeGenOnly = 0;
+}
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
VOPCCommon <ins, asm, pattern>, VOPCe <op>;
@@ -586,6 +611,7 @@ class DS <dag outs, dag ins, string asm, list<dag> pattern> :
let mayStore = 1;
let hasSideEffects = 0;
+ let AsmMatchConverter = "cvtDS";
let SchedRW = [WriteLDS];
}
@@ -598,6 +624,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
let hasSideEffects = 0;
let UseNamedOperandTable = 1;
+ let AsmMatchConverter = "cvtMubuf";
let SchedRW = [WriteVMEM];
}
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index d603ecb..076a0ce 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -6,6 +6,15 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
+def isSICI : Predicate<
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
+>, AssemblerPredicate<"FeatureGCN1Encoding">;
+def isCI : Predicate<"Subtarget->getGeneration() "
+ ">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isVI : Predicate <
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN3Encoding">;
class vop {
field bits<9> SI3;
@@ -233,14 +242,88 @@ def FRAMEri32 : Operand<iPTR> {
let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
}
+def SoppBrTarget : AsmOperandClass {
+ let Name = "SoppBrTarget";
+ let ParserMethod = "parseSOppBrTarget";
+}
+
def sopp_brtarget : Operand<OtherVT> {
let EncoderMethod = "getSOPPBrEncoding";
let OperandType = "OPERAND_PCREL";
+ let ParserMatchClass = SoppBrTarget;
}
include "SIInstrFormats.td"
include "VIInstrFormats.td"
+def MubufOffsetMatchClass : AsmOperandClass {
+ let Name = "MubufOffset";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
+ let Name = "DSOffset"#parser;
+ let ParserMethod = parser;
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isDSOffset";
+}
+
+def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
+def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
+
+def DSOffset01MatchClass : AsmOperandClass {
+ let Name = "DSOffset1";
+ let ParserMethod = "parseDSOff01OptionalOps";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isDSOffset01";
+}
+
+class GDSBaseMatchClass <string parser> : AsmOperandClass {
+ let Name = "GDS"#parser;
+ let PredicateMethod = "isImm";
+ let ParserMethod = parser;
+ let RenderMethod = "addImmOperands";
+}
+
+def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
+def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
+
+def GLCMatchClass : AsmOperandClass {
+ let Name = "GLC";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def SLCMatchClass : AsmOperandClass {
+ let Name = "SLC";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def TFEMatchClass : AsmOperandClass {
+ let Name = "TFE";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseMubufOptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def OModMatchClass : AsmOperandClass {
+ let Name = "OMod";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseVOP3OptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
+def ClampMatchClass : AsmOperandClass {
+ let Name = "Clamp";
+ let PredicateMethod = "isImm";
+ let ParserMethod = "parseVOP3OptionalOps";
+ let RenderMethod = "addImmOperands";
+}
+
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : Operand<i1> {
@@ -254,35 +337,52 @@ def addr64 : Operand<i1> {
}
def mbuf_offset : Operand<i16> {
let PrintMethod = "printMBUFOffset";
+ let ParserMatchClass = MubufOffsetMatchClass;
}
-def ds_offset : Operand<i16> {
+class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
let PrintMethod = "printDSOffset";
+ let ParserMatchClass = mc;
}
+def ds_offset : ds_offset_base <DSOffsetMatchClass>;
+def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+
def ds_offset0 : Operand<i8> {
let PrintMethod = "printDSOffset0";
+ let ParserMatchClass = DSOffset01MatchClass;
}
def ds_offset1 : Operand<i8> {
let PrintMethod = "printDSOffset1";
+ let ParserMatchClass = DSOffset01MatchClass;
}
-def gds : Operand <i1> {
+class gds_base <AsmOperandClass mc> : Operand <i1> {
let PrintMethod = "printGDS";
+ let ParserMatchClass = mc;
}
+def gds : gds_base <GDSMatchClass>;
+
+def gds01 : gds_base <GDS01MatchClass>;
+
def glc : Operand <i1> {
let PrintMethod = "printGLC";
+ let ParserMatchClass = GLCMatchClass;
}
def slc : Operand <i1> {
let PrintMethod = "printSLC";
+ let ParserMatchClass = SLCMatchClass;
}
def tfe : Operand <i1> {
let PrintMethod = "printTFE";
+ let ParserMatchClass = TFEMatchClass;
}
def omod : Operand <i32> {
let PrintMethod = "printOModSI";
+ let ParserMatchClass = OModMatchClass;
}
def ClampMod : Operand <i1> {
let PrintMethod = "printClampSI";
+ let ParserMatchClass = ClampMatchClass;
}
} // End OperandType = "OPERAND_IMMEDIATE"
@@ -392,12 +492,18 @@ class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let isCodeGenOnly = 0;
+ let AssemblerPredicates = [isSICI];
+}
class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
SOP1 <outs, ins, asm, []>,
SOP1e <op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let isCodeGenOnly = 0;
+ let AssemblerPredicates = [isVI];
+}
multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
list<dag> pattern> {
@@ -473,12 +579,16 @@ class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
SOP2<outs, ins, asm, []>,
SOP2e<op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
@@ -540,12 +650,28 @@ class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.SI>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ let isCodeGenOnly = 0;
+}
class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
SOPK <outs, ins, asm, []>,
SOPKe <op.VI>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ let isCodeGenOnly = 0;
+}
+
+multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
+ string asm = opName#opAsm> {
+ def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+ def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
+
+ def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
+
+}
multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
def "" : SOPK_Pseudo <opName, (outs SReg_32:$dst), (ins u16imm:$src0),
@@ -562,13 +688,39 @@ multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
(ins SReg_32:$src0, u16imm:$src1), pattern>;
- def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+ let DisableEncoding = "$dst" in {
+ def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
- def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
- (ins SReg_32:$src0, u16imm:$src1), opName#" $dst, $src0">;
+ def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
+ (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+ }
}
+multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
+ op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
+ " $sdst, $simm16"
+>;
+
+multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
+ string argAsm, string asm = opName#argAsm> {
+
+ def "" : SOPK_Pseudo <opName, outs, ins, []>;
+
+ def _si : SOPK <outs, ins, asm, []>,
+ SOPK64e <op.SI>,
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ let isCodeGenOnly = 0;
+ }
+
+ def _vi : SOPK <outs, ins, asm, []>,
+ SOPK64e <op.VI>,
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ let isCodeGenOnly = 0;
+ }
+}
//===----------------------------------------------------------------------===//
// SMRD classes
//===----------------------------------------------------------------------===//
@@ -584,13 +736,17 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
string asm> :
SMRD <outs, ins, asm, []>,
SMRDe <op, imm>,
- SIMCInstr<opName, SISubtarget.SI>;
+ SIMCInstr<opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
string asm> :
SMRD <outs, ins, asm, []>,
SMEMe_vi <op, imm>,
- SIMCInstr<opName, SISubtarget.VI>;
+ SIMCInstr<opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
string asm, list<dag> pattern> {
@@ -629,8 +785,14 @@ multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
let PrintMethod = "printOperandAndMods";
}
+
+def InputModsMatchClass : AsmOperandClass {
+ let Name = "RegWithInputMods";
+}
+
def InputModsNoDefault : Operand <i32> {
let PrintMethod = "printOperandAndMods";
+ let ParserMatchClass = InputModsMatchClass;
}
class getNumSrcArgs<ValueType Src1, ValueType Src2> {
@@ -838,7 +1000,8 @@ class AtomicNoRet <string noRetOp, bit isRet> {
class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP1Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr <opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr <opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
@@ -873,18 +1036,23 @@ multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP2Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI>;
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
- VOP2 <op.SI, outs, ins, opName#asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI>;
+ VOP2 <op.VI, outs, ins, opName#asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, string revOp> {
@@ -930,7 +1098,8 @@ class VOP3DisableModFields <bit HasSrc0Mods,
class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOP3Common <outs, ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e64", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e64", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e64", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -938,22 +1107,30 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3e <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI>;
+ SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3e_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI>;
+ SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3be <op>,
- SIMCInstr<opName#"_e64", SISubtarget.SI>;
+ SIMCInstr<opName#"_e64", SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+}
class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName> :
VOP3Common <outs, ins, asm, []>,
VOP3be_vi <op>,
- SIMCInstr <opName#"_e64", SISubtarget.VI>;
+ SIMCInstr <opName#"_e64", SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+}
multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
string opName, int NumSrcArgs, bit HasMods = 1> {
@@ -1095,12 +1272,16 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
}
def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
- SIMCInstr <opName, SISubtarget.SI>;
+ SIMCInstr <opName, SISubtarget.SI> {
+ let AssemblerPredicates = [isSICI];
+ }
def _vi : VOP3Common <outs, ins, asm, []>,
VOP3e_vi <op.VI3>,
VOP3DisableFields <1, 0, 0>,
- SIMCInstr <opName, SISubtarget.VI>;
+ SIMCInstr <opName, SISubtarget.VI> {
+ let AssemblerPredicates = [isVI];
+ }
}
multiclass VOP1_Helper <vop1 op, string opName, dag outs,
@@ -1253,7 +1434,8 @@ let isCodeGenOnly = 0 in {
class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
VOPCCommon <ins, "", pattern>,
VOP <opName>,
- SIMCInstr<opName#"_e32", SISubtarget.NONE> {
+ SIMCInstr<opName#"_e32", SISubtarget.NONE>,
+ MnemonicAlias<opName#"_e32", opName> {
let isPseudo = 1;
let isCodeGenOnly = 1;
}
@@ -1504,7 +1686,9 @@ class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
DSe <op>,
- SIMCInstr <opName, SISubtarget.SI>;
+ SIMCInstr <opName, SISubtarget.SI> {
+ let isCodeGenOnly = 0;
+}
class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
DS <outs, ins, asm, []>,
@@ -1518,6 +1702,7 @@ class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm
bits<16> offset;
let offset0 = offset{7-0};
let offset1 = offset{15-8};
+ let isCodeGenOnly = 0;
}
class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
@@ -1545,12 +1730,12 @@ multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
dag outs = (outs rc:$vdst),
dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
- gds:$gds, M0Reg:$m0),
+ gds01:$gds, M0Reg:$m0),
string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
def "" : DS_Pseudo <opName, outs, ins, []>;
- let data0 = 0, data1 = 0 in {
+ let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
def _si : DS_Real_si <op, opName, outs, ins, asm>;
def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
@@ -1574,12 +1759,12 @@ multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
dag outs = (outs),
dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1, gds:$gds, M0Reg:$m0),
+ ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds, M0Reg:$m0),
string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
def "" : DS_Pseudo <opName, outs, ins, []>;
- let vdst = 0 in {
+ let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
def _si : DS_Real_si <op, opName, outs, ins, asm>;
def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
}
@@ -1653,7 +1838,7 @@ multiclass DS_0A_RET <bits<8> op, string opName,
multiclass DS_1A_RET_GDS <bits<8> op, string opName,
dag outs = (outs VGPR_32:$vdst),
- dag ins = (ins VGPR_32:$addr, ds_offset:$offset, M0Reg:$m0),
+ dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset, M0Reg:$m0),
string asm = opName#" $vdst, $addr"#"$offset gds"> {
def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -1762,6 +1947,20 @@ class mubuf <bits<7> si, bits<7> vi = si> {
field bits<7> VI = vi;
}
+let isCodeGenOnly = 0 in {
+
+class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
+ let lds = 0;
+}
+
+} // End let isCodeGenOnly = 0
+
+class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
+ MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
+ let lds = 0;
+}
+
class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
bit IsAddr64 = is_addr64;
string OpName = NAME # suffix;
@@ -1805,7 +2004,7 @@ multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <0>;
- let addr64 = 0 in {
+ let addr64 = 0, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@@ -1818,7 +2017,7 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
MUBUFAddr64Table <1>;
- let addr64 = 1 in {
+ let addr64 = 1, isCodeGenOnly = 0 in {
def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
}
@@ -1826,11 +2025,6 @@ multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
// for VI appropriately.
}
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
- MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
- let lds = 0;
-}
-
multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
string asm, list<dag> pattern, bit is_return> {
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 95b2470..91e8c8c 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -27,18 +27,10 @@ def SendMsgImm : Operand<i32> {
}
def isGCN : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+ ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+ AssemblerPredicate<"FeatureGCN">;
def isSI : Predicate<"Subtarget->getGeneration() "
"== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
-def isSICI : Predicate<
- "Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
->;
-def isCI : Predicate<"Subtarget->getGeneration() "
- ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
- "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS"
->;
def HasFlatAddressSpace : Predicate<"Subtarget.hasFlatAddressSpace()">;
@@ -242,9 +234,9 @@ defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
>;
} // End Defs = [SCC]
-defm S_CSELECT_B32 : SOP2_SELECT_32 <sop2<0x0a>, "s_cselect_b32", []>;
let Uses = [SCC] in {
+ defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
} // End Uses = [SCC]
@@ -387,6 +379,7 @@ defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
>;
*/
+defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
@@ -400,18 +393,27 @@ defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
} // End isCompare = 1
-let isCommutable = 1 in {
- let Defs = [SCC], isCommutable = 1 in {
- defm S_ADDK_I32 : SOPK_32 <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
- }
- defm S_MULK_I32 : SOPK_32 <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+ Constraints = "$sdst = $src0" in {
+ defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
+ defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
}
-//defm S_CBRANCH_I_FORK : SOPK_ <sopk<0x11, 0x10>, "s_cbranch_i_fork", []>;
+defm S_CBRANCH_I_FORK : SOPK_m <
+ sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
+ (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
defm S_GETREG_B32 : SOPK_32 <sopk<0x12, 0x11>, "s_getreg_b32", []>;
-defm S_SETREG_B32 : SOPK_32 <sopk<0x13, 0x12>, "s_setreg_b32", []>;
-defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-//defm S_SETREG_IMM32_B32 : SOPK_32 <sopk<0x15, 0x14>, "s_setreg_imm32_b32", []>;
+defm S_SETREG_B32 : SOPK_m <
+ sopk<0x13, 0x12>, "s_setreg_b32", (outs),
+ (ins SReg_32:$sdst, u16imm:$simm16), " $sdst, $simm16"
+>;
+// FIXME: Not on SI?
+//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
+defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
+ sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
+ (ins i32imm:$imm, u16imm:$simm16), " $imm, $simm16"
+>;
//===----------------------------------------------------------------------===//
// SOPP Instructions
@@ -1630,7 +1632,6 @@ defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
VOP_F32_F32_I32, AMDGPUldexp
>;
-
defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
VOP_I32_F32_I32>; // TODO: set "Uses = dst"
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 7bb5dc2..f289014 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -66,7 +66,7 @@ foreach Index = 0-255 in {
//===----------------------------------------------------------------------===//
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "SGPR%u", 0, 101))>;
// SGPR 64-bit registers
@@ -113,7 +113,7 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 15), 4))]>;
// VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "VGPR%u", 0, 255))>;
// VGPR 64-bit registers
@@ -169,6 +169,11 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
+class RegImmMatcher<string name> : AsmOperandClass {
+ let Name = name;
+ let RenderMethod = "addRegOrImmOperands";
+}
+
// Special register classes for predicates and the M0 register
def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
let CopyCost = -1; // Theoretically it is possible to read from SCC,
@@ -180,7 +185,7 @@ def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add SGPR_32, M0Reg, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
>;
@@ -227,15 +232,21 @@ class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
-def SSrc_32 : RegImmOperand<SReg_32>;
+def SSrc_32 : RegImmOperand<SReg_32> {
+ let ParserMatchClass = RegImmMatcher<"SSrc32">;
+}
-def SSrc_64 : RegImmOperand<SReg_64>;
+def SSrc_64 : RegImmOperand<SReg_64> {
+ let ParserMatchClass = RegImmMatcher<"SSrc64">;
+}
//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
-def SCSrc_32 : RegInlineOperand<SReg_32>;
+def SCSrc_32 : RegInlineOperand<SReg_32> {
+ let ParserMatchClass = RegImmMatcher<"SCSrc32">;
+}
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
@@ -245,14 +256,30 @@ def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
-def VSrc_32 : RegImmOperand<VS_32>;
+def VSrc_32 : RegisterOperand<VS_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc32">;
+}
-def VSrc_64 : RegImmOperand<VS_64>;
+def VSrc_64 : RegisterOperand<VS_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_IMM32";
+ let ParserMatchClass = RegImmMatcher<"VSrc64">;
+}
//===----------------------------------------------------------------------===//
// VCSrc_* Operands with an SGPR, VGPR or an inline constant
//===----------------------------------------------------------------------===//
-def VCSrc_32 : RegInlineOperand<VS_32>;
+def VCSrc_32 : RegisterOperand<VS_32> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc32">;
+}
-def VCSrc_64 : RegInlineOperand<VS_64>;
+def VCSrc_64 : RegisterOperand<VS_64> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = "OPERAND_REG_INLINE_C";
+ let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+}
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 27bbf4f..591ce85 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -104,7 +104,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
Function *F = I.getCalledFunction();
- std::string Name = F->getName().str();
+ std::string Name = F->getName();
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
if (Arg->getType() == v16i8) {
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index 5975a51..b6eebb0 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -34,7 +34,7 @@ namespace Sparc {
#define PRINT_ALIAS_INSTR
#include "SparcGenAsmWriter.inc"
-bool SparcInstPrinter::isV9() const {
+bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
return (STI.getFeatureBits() & Sparc::FeatureV9) != 0;
}
@@ -44,15 +44,15 @@ void SparcInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const
}
void SparcInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot)
-{
- if (!printAliasInstr(MI, O) && !printSparcAliasInstr(MI, O))
- printInstruction(MI, O);
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ if (!printAliasInstr(MI, STI, O) && !printSparcAliasInstr(MI, STI, O))
+ printInstruction(MI, STI, O);
printAnnotation(O, Annot);
}
-bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
-{
+bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
switch (MI->getOpcode()) {
default: return false;
case SP::JMPLrr:
@@ -72,16 +72,16 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
case SP::O7: O << "\tretl"; return true;
}
}
- O << "\tjmp "; printMemOperand(MI, 1, O);
+ O << "\tjmp "; printMemOperand(MI, 1, STI, O);
return true;
case SP::O7: // call $addr
- O << "\tcall "; printMemOperand(MI, 1, O);
+ O << "\tcall "; printMemOperand(MI, 1, STI, O);
return true;
}
}
case SP::V9FCMPS: case SP::V9FCMPD: case SP::V9FCMPQ:
case SP::V9FCMPES: case SP::V9FCMPED: case SP::V9FCMPEQ: {
- if (isV9()
+ if (isV9(STI)
|| (MI->getNumOperands() != 3)
|| (!MI->getOperand(0).isReg())
|| (MI->getOperand(0).getReg() != SP::FCC0))
@@ -96,17 +96,17 @@ bool SparcInstPrinter::printSparcAliasInstr(const MCInst *MI, raw_ostream &O)
case SP::V9FCMPED: O << "\tfcmped "; break;
case SP::V9FCMPEQ: O << "\tfcmpeq "; break;
}
- printOperand(MI, 1, O);
+ printOperand(MI, 1, STI, O);
O << ", ";
- printOperand(MI, 2, O);
+ printOperand(MI, 2, STI, O);
return true;
}
}
}
void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
- raw_ostream &O)
-{
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand (opNum);
if (MO.isReg()) {
@@ -124,14 +124,14 @@ void SparcInstPrinter::printOperand(const MCInst *MI, int opNum,
}
void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
- raw_ostream &O, const char *Modifier)
-{
- printOperand(MI, opNum, O);
+ const MCSubtargetInfo &STI,
+ raw_ostream &O, const char *Modifier) {
+ printOperand(MI, opNum, STI, O);
// If this is an ADD operand, emit it like normal operands.
if (Modifier && !strcmp(Modifier, "arith")) {
O << ", ";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum+1, STI, O);
return;
}
const MCOperand &MO = MI->getOperand(opNum+1);
@@ -143,12 +143,12 @@ void SparcInstPrinter::printMemOperand(const MCInst *MI, int opNum,
O << "+";
- printOperand(MI, opNum+1, O);
+ printOperand(MI, opNum+1, STI, O);
}
void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
- raw_ostream &O)
-{
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
int CC = (int)MI->getOperand(opNum).getImm();
switch (MI->getOpcode()) {
default: break;
@@ -171,8 +171,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
}
bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum,
- raw_ostream &O)
-{
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX.");
return true;
}
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index c96d5ad..0b01b88 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -22,32 +22,36 @@ namespace llvm {
class MCOperand;
class SparcInstPrinter : public MCInstPrinter {
- const MCSubtargetInfo &STI;
public:
- SparcInstPrinter(const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &sti)
- : MCInstPrinter(MAI, MII, MRI), STI(sti) {}
+ SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
- bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS);
- bool isV9() const;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ bool printSparcAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ bool isV9(const MCSubtargetInfo &STI) const;
// Autogenerated by tblgen.
- void printInstruction(const MCInst *MI, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ bool printAliasInstr(const MCInst *MI, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
- unsigned PrintMethodIdx, raw_ostream &O);
+ unsigned PrintMethodIdx,
+ const MCSubtargetInfo &STI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- void printOperand(const MCInst *MI, int opNum, raw_ostream &OS);
- void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS,
- const char *Modifier = nullptr);
- void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS);
- bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
-
+ void printOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ void printMemOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS, const char *Modifier = nullptr);
+ void printCCOperand(const MCInst *MI, int opNum, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
+ bool printGetPCX(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &OS);
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index dcd81e3..4abb6b8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -244,7 +244,7 @@ namespace {
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
return createSparcELFObjectWriter(OS, is64Bit(), OSABI);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 5ba82f1..98ba7e6 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -26,7 +26,8 @@ namespace {
Is64Bit ? ELF::EM_SPARCV9 : ELF::EM_SPARC,
/*HasRelocationAddend*/ true) {}
- virtual ~SparcELFObjectWriter() {}
+ ~SparcELFObjectWriter() override {}
+
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
@@ -104,9 +105,8 @@ unsigned SparcELFObjectWriter::GetRelocType(const MCValue &Target,
return ELF::R_SPARC_NONE;
}
-MCObjectWriter *llvm::createSparcELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint8_t OSABI) {
+MCObjectWriter *llvm::createSparcELFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new SparcELFObjectWriter(Is64Bit, OSABI);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 598856f..b447ab3 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -38,7 +38,7 @@ class SparcMCCodeEmitter : public MCCodeEmitter {
public:
SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {}
- ~SparcMCCodeEmitter() {}
+ ~SparcMCCodeEmitter() override {}
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 630ed1b..7895404 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -134,13 +134,12 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
return new SparcTargetAsmStreamer(S, OS);
}
-static MCInstPrinter *createSparcMCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- return new SparcInstPrinter(MAI, MII, MRI, STI);
+static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
+ return new SparcInstPrinter(MAI, MII, MRI);
}
extern "C" void LLVMInitializeSparcTargetMC() {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index d2ec991..5f38b12 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -26,6 +26,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class Target;
class StringRef;
+class raw_pwrite_stream;
class raw_ostream;
extern Target TheSparcTarget;
@@ -38,8 +39,7 @@ MCAsmBackend *createSparcAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
StringRef TT,
StringRef CPU);
-MCObjectWriter *createSparcELFObjectWriter(raw_ostream &OS,
- bool Is64Bit,
+MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint8_t OSABI);
} // End llvm namespace
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 3159a46..c34122e 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -92,8 +92,15 @@ def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc,
// Declare the target which we are implementing
//===----------------------------------------------------------------------===//
+def SparcAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int PassSubtarget = 1;
+ int Variant = 0;
+}
+
def Sparc : Target {
// Pull in Instruction Info:
let InstructionSet = SparcInstrInfo;
let AssemblyParsers = [SparcAsmParser];
+ let AssemblyWriters = [SparcAsmWriter];
}
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 0439f9d..56290e2 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -58,7 +58,6 @@ namespace {
void EmitFunctionBodyStart() override;
void EmitInstruction(const MachineInstr *MI) override;
- void EmitEndOfAsmFile(Module &M) override;
static const char *getRegisterName(unsigned RegNo) {
return SparcInstPrinter::getRegisterName(RegNo);
@@ -442,23 +441,6 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void SparcAsmPrinter::EmitEndOfAsmFile(Module &M) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Generate stubs for global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataSection());
- unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), PtrSize);
- }
- }
-}
-
// Force static initialization.
extern "C" void LLVMInitializeSparcAsmPrinter() {
RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index c8b0570..5b964af 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -727,7 +727,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
false, // isVolatile,
- (Size <= 32), // AlwaysInline if size <= 32
+ (Size <= 32), // AlwaysInline if size <= 32,
+ false, // isTailCall
MachinePointerInfo(), MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index a3a21d6..6818291 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -23,7 +23,7 @@ class SparcTargetMachine;
class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
explicit SparcSelectionDAGInfo(const DataLayout &DL);
- ~SparcSelectionDAGInfo();
+ ~SparcSelectionDAGInfo() override;
};
}
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 60a3912..336f037 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -29,6 +29,7 @@ add_llvm_target(SystemZCodeGen
SystemZShortenInst.cpp
SystemZSubtarget.cpp
SystemZTargetMachine.cpp
+ SystemZTargetTransformInfo.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 23173bf..84400f8 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -25,7 +25,7 @@ class SystemZDisassembler : public MCDisassembler {
public:
SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
: MCDisassembler(STI, Ctx) {}
- virtual ~SystemZDisassembler() {}
+ ~SystemZDisassembler() override {}
DecodeStatus getInstruction(MCInst &instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 996a492..cf1ee54 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -43,7 +43,8 @@ void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
}
void SystemZInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
printInstruction(MI, O);
printAnnotation(O, Annot);
}
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 732e5fa..6f56c7b 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -39,7 +39,8 @@ public:
// Override MCInstPrinter.
void printRegName(raw_ostream &O, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
private:
// Print various types of operand.
diff --git a/lib/Target/SystemZ/LLVMBuild.txt b/lib/Target/SystemZ/LLVMBuild.txt
index 542aaee..6f8431d 100644
--- a/lib/Target/SystemZ/LLVMBuild.txt
+++ b/lib/Target/SystemZ/LLVMBuild.txt
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = SystemZCodeGen
parent = SystemZ
-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index b79b1d8..1c3887a 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -62,7 +62,7 @@ public:
llvm_unreachable("SystemZ does do not have assembler relaxation");
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createSystemZObjectWriter(OS, OSABI);
}
};
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 40dc48e..8dd70b9 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -32,7 +32,7 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~SystemZMCCodeEmitter() {}
+ ~SystemZMCCodeEmitter() override {}
// OVerride MCCodeEmitter.
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 2632518..ee1af02 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -20,7 +20,7 @@ class SystemZObjectWriter : public MCELFObjectTargetWriter {
public:
SystemZObjectWriter(uint8_t OSABI);
- virtual ~SystemZObjectWriter();
+ ~SystemZObjectWriter() override;
protected:
// Override MCELFObjectTargetWriter.
@@ -152,7 +152,7 @@ unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
}
}
-MCObjectWriter *llvm::createSystemZObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createSystemZObjectWriter(raw_pwrite_stream &OS,
uint8_t OSABI) {
MCELFObjectTargetWriter *MOTW = new SystemZObjectWriter(OSABI);
return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/false);
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index ffd05a9..ea56fb1 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -172,12 +172,11 @@ static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createSystemZMCInstPrinter(const Target &T,
+static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new SystemZInstPrinter(MAI, MII, MRI);
}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 962c950..2b2647b 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -23,6 +23,7 @@ class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
class Target;
+class raw_pwrite_stream;
class raw_ostream;
extern Target TheSystemZTarget;
@@ -77,7 +78,7 @@ MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
-MCObjectWriter *createSystemZObjectWriter(raw_ostream &OS, uint8_t OSABI);
+MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
} // end namespace llvm
// Defines symbolic names for SystemZ registers.
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index 5f17edb..b3a7310 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -68,6 +68,18 @@ const unsigned CCMASK_TM_MSB_0 = CCMASK_0 | CCMASK_1;
const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3;
const unsigned CCMASK_TM = CCMASK_ANY;
+// Condition-code mask assignments for TRANSACTION_BEGIN.
+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0;
+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2;
+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3;
+const unsigned CCMASK_TBEGIN = CCMASK_ANY;
+
+// Condition-code mask assignments for TRANSACTION_END.
+const unsigned CCMASK_TEND_TX = CCMASK_0;
+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
+
// The position of the low CC bit in an IPM result.
const unsigned IPM_CC = 28;
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 18e37e3..2524733 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -255,29 +255,6 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-void SystemZAsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Triple(TM.getTargetTriple()).isOSBinFormatELF()) {
- auto &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
- }
-}
-
// Force static initialization.
extern "C" void LLVMInitializeSystemZAsmPrinter() {
RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.h b/lib/Target/SystemZ/SystemZAsmPrinter.h
index a4d5b78..7f6e823 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -38,7 +38,6 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
- void EmitEndOfAsmFile(Module &M) override;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index a52aa25..1a58b53 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -898,6 +898,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
}
unsigned Opcode = SystemZ::RISBG;
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
EVT OpcodeVT = MVT::i64;
if (VT == MVT::i32 && Subtarget->hasHighWord()) {
Opcode = SystemZ::RISBMux;
@@ -945,9 +948,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
// See whether we can avoid an AND in the first operand by converting
// ROSBG to RISBG.
- if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
+ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
Opcode = SystemZ::RISBG;
-
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (Subtarget->hasMiscellaneousExtensions())
+ Opcode = SystemZ::RISBGN;
+ }
+
EVT VT = N->getValueType(0);
SDValue Ops[5] = {
convertTo(SDLoc(N), MVT::i64, Op0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 0ca8bcd..21882cb 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Intrinsics.h"
#include <cctype>
using namespace llvm;
@@ -163,8 +164,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
// available, or if the operand is constant.
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+ // Use POPCNT on z196 and above.
+ if (Subtarget.hasPopulationCount())
+ setOperationAction(ISD::CTPOP, VT, Custom);
+ else
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
// No special instructions for these.
- setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -299,6 +305,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm,
// Codes for which we want to perform some z-specific combinations.
setTargetDAGCombine(ISD::SIGN_EXTEND);
+ // Handle intrinsics.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+
// We want to use MVC in preference to even a single load/store pair.
MaxStoresPerMemcpy = 0;
MaxStoresPerMemcpyOptSize = 0;
@@ -342,6 +351,16 @@ bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
return Imm.isZero() || Imm.isNegZero();
}
+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ // We can use CGFI or CLGFI.
+ return isInt<32>(Imm) || isUInt<32>(Imm);
+}
+
+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ // We can use ALGFI or SLGFI.
+ return isUInt<32>(Imm) || isUInt<32>(-Imm);
+}
+
bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned,
unsigned,
@@ -1016,6 +1035,53 @@ prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const {
return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
}
+// Return true if Op is an intrinsic node with chain that returns the CC value
+// as its only (other) argument. Provide the associated SystemZISD opcode and
+// the mask of valid CC values if so.
+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
+ unsigned &CCValid) {
+ unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ switch (Id) {
+ case Intrinsic::s390_tbegin:
+ Opcode = SystemZISD::TBEGIN;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tbegin_nofloat:
+ Opcode = SystemZISD::TBEGIN_NOFLOAT;
+ CCValid = SystemZ::CCMASK_TBEGIN;
+ return true;
+
+ case Intrinsic::s390_tend:
+ Opcode = SystemZISD::TEND;
+ CCValid = SystemZ::CCMASK_TEND;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+// Emit an intrinsic with chain with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
+ unsigned Opcode) {
+ // Copy all operands except the intrinsic ID.
+ unsigned NumOps = Op.getNumOperands();
+ SmallVector<SDValue, 6> Ops;
+ Ops.reserve(NumOps - 1);
+ Ops.push_back(Op.getOperand(0));
+ for (unsigned I = 2; I < NumOps; ++I)
+ Ops.push_back(Op.getOperand(I));
+
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+ SDValue OldChain = SDValue(Op.getNode(), 1);
+ SDValue NewChain = SDValue(Intr.getNode(), 0);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
+ return Intr;
+}
+
// CC is a comparison that will be implemented using an integer or
// floating-point comparison. Return the condition code mask for
// a branch on true. In the integer case, CCMASK_CMP_UO is set for
@@ -1530,6 +1596,8 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
MaskVal = -(CmpVal & -CmpVal);
NewC.ICmpType = SystemZICMP::UnsignedOnly;
}
+ if (!MaskVal)
+ return;
// Check whether the combination of mask, comparison value and comparison
// type are suitable.
@@ -1571,9 +1639,53 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
C.CCMask = NewCCMask;
}
+// Return a Comparison that tests the condition-code result of intrinsic
+// node Call against constant integer CC using comparison code Cond.
+// Opcode is the opcode of the SystemZISD operation for the intrinsic
+// and CCValid is the set of possible condition-code results.
+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
+ SDValue Call, unsigned CCValid, uint64_t CC,
+ ISD::CondCode Cond) {
+ Comparison C(Call, SDValue());
+ C.Opcode = Opcode;
+ C.CCValid = CCValid;
+ if (Cond == ISD::SETEQ)
+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
+ else if (Cond == ISD::SETNE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
+ // always true for CC>3.
+ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
+ // ...and the inverse of that.
+ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
+ else
+ llvm_unreachable("Unexpected integer comparison type");
+ C.CCMask &= CCValid;
+ return C;
+}
+
// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
ISD::CondCode Cond) {
+ if (CmpOp1.getOpcode() == ISD::Constant) {
+ uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
+ unsigned Opcode, CCValid;
+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+ }
Comparison C(CmpOp0, CmpOp1);
C.CCMask = CCMaskForCondCode(Cond);
if (C.Op0.getValueType().isFloatingPoint()) {
@@ -1615,6 +1727,17 @@ static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
// Emit the comparison instruction described by C.
static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
+ if (!C.Op1.getNode()) {
+ SDValue Op;
+ switch (C.Op0.getOpcode()) {
+ case ISD::INTRINSIC_W_CHAIN:
+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
+ break;
+ default:
+ llvm_unreachable("Invalid comparison operands");
+ }
+ return SDValue(Op.getNode(), Op->getNumValues() - 1);
+ }
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
DAG.getConstant(C.ICmpType, MVT::i32));
@@ -1696,7 +1819,6 @@ SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
}
SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue CmpOp0 = Op.getOperand(2);
SDValue CmpOp1 = Op.getOperand(3);
@@ -1706,7 +1828,7 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
SDValue Glue = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
- Chain, DAG.getConstant(C.CCValid, MVT::i32),
+ Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
}
@@ -2100,6 +2222,7 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32),
/*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false,
+ /*isTailCall*/false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
@@ -2292,6 +2415,46 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
MVT::i64, HighOp, Low32);
}
+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ int64_t OrigBitSize = VT.getSizeInBits();
+ SDLoc DL(Op);
+
+ // Get the known-zero mask for the operand.
+ Op = Op.getOperand(0);
+ APInt KnownZero, KnownOne;
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
+ unsigned NumSignificantBits = (~KnownZero).getActiveBits();
+ if (NumSignificantBits == 0)
+ return DAG.getConstant(0, VT);
+
+ // Skip known-zero high parts of the operand.
+ int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
+ BitSize = std::min(BitSize, OrigBitSize);
+
+ // The POPCNT instruction counts the number of bits in each byte.
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
+
+ // Add up per-byte counts in a binary tree. All bits of Op at
+ // position larger than BitSize remain zero throughout.
+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
+ if (BitSize != OrigBitSize)
+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
+ }
+
+ // Extract overall result from high byte.
+ if (BitSize > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
+
+ return Op;
+}
+
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@@ -2505,6 +2668,30 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
Node->getMemoryVT(), Node->getMemOperand());
}
+// Return an i32 that contains the value of CC immediately after After,
+// whose final operand must be MVT::Glue.
+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
+ SDValue Glue = SDValue(After, After->getNumValues() - 1);
+ SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
+ return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
+}
+
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opcode, CCValid;
+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
+ SDValue CC = getCCResult(DAG, Glued.getNode());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
+ return SDValue();
+ }
+
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2542,6 +2729,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerUDIVREM(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
+ case ISD::CTPOP:
+ return lowerCTPOP(Op, DAG);
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:
@@ -2576,6 +2765,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerSTACKRESTORE(Op, DAG);
case ISD::PREFETCH:
return lowerPREFETCH(Op, DAG);
+ case ISD::INTRINSIC_W_CHAIN:
+ return lowerINTRINSIC_W_CHAIN(Op, DAG);
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -2616,6 +2807,9 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SEARCH_STRING);
OPCODE(IPM);
OPCODE(SERIALIZE);
+ OPCODE(TBEGIN);
+ OPCODE(TBEGIN_NOFLOAT);
+ OPCODE(TEND);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
OPCODE(ATOMIC_LOADW_SUB);
@@ -3443,6 +3637,50 @@ SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
return DoneMBB;
}
+// Update TBEGIN instruction with final opcode and register clobbers.
+MachineBasicBlock *
+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const {
+ MachineFunction &MF = *MBB->getParent();
+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+
+ // Update opcode.
+ MI->setDesc(TII->get(Opcode));
+
+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
+ // Make sure to add the corresponding GRSM bits if they are missing.
+ uint64_t Control = MI->getOperand(2).getImm();
+ static const unsigned GPRControlBit[16] = {
+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
+ };
+ Control |= GPRControlBit[15];
+ if (TFI->hasFP(MF))
+ Control |= GPRControlBit[11];
+ MI->getOperand(2).setImm(Control);
+
+ // Add GPR clobbers.
+ for (int I = 0; I < 16; I++) {
+ if ((Control & GPRControlBit[I]) == 0) {
+ unsigned Reg = SystemZMC::GR64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ // Add FPR clobbers.
+ if (!NoFloat && (Control & 4) != 0) {
+ for (int I = 0; I < 16; I++) {
+ unsigned Reg = SystemZMC::FP64Regs[I];
+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+
+ return MBB;
+}
+
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@@ -3684,6 +3922,12 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitStringWrapper(MI, MBB, SystemZ::MVST);
case SystemZ::SRSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::SRST);
+ case SystemZ::TBEGIN:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
+ case SystemZ::TBEGIN_nofloat:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
+ case SystemZ::TBEGINC:
+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
default:
llvm_unreachable("Unexpected instr type to insert");
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 23c62c9..56d7ef4 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -87,6 +87,9 @@ enum {
// the number of the register.
EXTRACT_ACCESS,
+ // Count number of bits set in operand 0 per byte.
+ POPCNT,
+
// Wrappers around the ISD opcodes of the same name. The output and
// first input operands are GR128s. The trailing numbers are the
// widths of the second operand in bits.
@@ -143,6 +146,15 @@ enum {
// Perform a serialization operation. (BCR 15,0 or BCR 14,0.)
SERIALIZE,
+ // Transaction begin. The first operand is the chain, the second
+ // the TDB pointer, and the third the immediate control field.
+ // Returns chain and glue.
+ TBEGIN,
+ TBEGIN_NOFLOAT,
+
+ // Transaction end. Just the chain operand. Returns chain and glue.
+ TEND,
+
// Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
// ATOMIC_LOAD_<op>.
//
@@ -213,6 +225,8 @@ public:
EVT getSetCCResultType(LLVMContext &, EVT) const override;
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+ bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
@@ -302,6 +316,7 @@ private:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -312,6 +327,7 @@ private:
SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
// If the last instruction before MBBI in MBB was some form of COMPARE,
// try to replace it with a COMPARE AND BRANCH just before MBBI.
@@ -349,6 +365,10 @@ private:
MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
+ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ unsigned Opcode,
+ bool NoFloat) const;
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 9f59a1c..2d3c9e2 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -473,6 +473,17 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{15-0} = BD2;
}
+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<4, outs, ins, asmstr, pattern> {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+
+ bits<16> BD2;
+
+ let Inst{31-16} = op;
+ let Inst{15-0} = BD2;
+}
+
//===----------------------------------------------------------------------===//
// Instruction definitions with semantics
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5128993..3a02859 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -723,9 +723,12 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Start, End;
if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
unsigned NewOpcode;
- if (And.RegSize == 64)
+ if (And.RegSize == 64) {
NewOpcode = SystemZ::RISBG;
- else {
+ // Prefer RISBGN if available, since it does not clobber CC.
+ if (STI.hasMiscellaneousExtensions())
+ NewOpcode = SystemZ::RISBGN;
+ } else {
NewOpcode = SystemZ::RISBMux;
Start &= 31;
End &= 31;
@@ -1146,17 +1149,22 @@ unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
switch (Opcode) {
- case SystemZ::L: return SystemZ::LT;
- case SystemZ::LY: return SystemZ::LT;
- case SystemZ::LG: return SystemZ::LTG;
- case SystemZ::LGF: return SystemZ::LTGF;
- case SystemZ::LR: return SystemZ::LTR;
- case SystemZ::LGFR: return SystemZ::LTGFR;
- case SystemZ::LGR: return SystemZ::LTGR;
- case SystemZ::LER: return SystemZ::LTEBR;
- case SystemZ::LDR: return SystemZ::LTDBR;
- case SystemZ::LXR: return SystemZ::LTXBR;
- default: return 0;
+ case SystemZ::L: return SystemZ::LT;
+ case SystemZ::LY: return SystemZ::LT;
+ case SystemZ::LG: return SystemZ::LTG;
+ case SystemZ::LGF: return SystemZ::LTGF;
+ case SystemZ::LR: return SystemZ::LTR;
+ case SystemZ::LGFR: return SystemZ::LTGFR;
+ case SystemZ::LGR: return SystemZ::LTGR;
+ case SystemZ::LER: return SystemZ::LTEBR;
+ case SystemZ::LDR: return SystemZ::LTDBR;
+ case SystemZ::LXR: return SystemZ::LTXBR;
+ // On zEC12 we prefer to use RISBGN. But if there is a chance to
+ // actually use the condition code, we may turn it back into RISGB.
+ // Note that RISBG is not really a "load-and-test" instruction,
+ // but sets the same condition code values, so is OK to use here.
+ case SystemZ::RISBGN: return SystemZ::RISBG;
+ default: return 0;
}
}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index a7f7747..820f30b 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1061,6 +1061,10 @@ let Defs = [CC] in {
def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
}
+// On zEC12 we have a variant of RISBG that does not set CC.
+let Predicates = [FeatureMiscellaneousExtensions] in
+ def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
+
// Forms of RISBG that only affect one word of the destination register.
// They do not set CC.
let Predicates = [FeatureHighWord] in {
@@ -1358,6 +1362,60 @@ let Defs = [CC] in {
}
//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureTransactionalExecution] in {
+ // Transaction Begin
+ let hasSideEffects = 1, mayStore = 1,
+ usesCustomInserter = 1, Defs = [CC] in {
+ def TBEGIN : InstSIL<0xE560,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbegin\t$BD1, $I2",
+ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
+ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ [(z_tbegin_nofloat bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ def TBEGINC : InstSIL<0xE561,
+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
+ "tbeginc\t$BD1, $I2",
+ [(int_s390_tbeginc bdaddr12only:$BD1,
+ imm32zx16:$I2)]>;
+ }
+
+ // Transaction End
+ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
+ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+
+ // Transaction Abort
+ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
+ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
+ "tabort\t$BD2",
+ [(int_s390_tabort bdaddr12only:$BD2)]>;
+
+ // Nontransactional Store
+ let hasSideEffects = 1 in
+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+
+ // Extract Transaction Nesting Depth
+ let hasSideEffects = 1 in
+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureProcessorAssist] in {
+ let hasSideEffects = 1, R4 = 0 in
+ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
+ "ppa\t$R1, $R2, $R3", []>;
+ def : Pat<(int_s390_ppa_txassist GR32:$src),
+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
+ 0, 1)>;
+}
+
+//===----------------------------------------------------------------------===//
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
@@ -1382,6 +1440,13 @@ let Defs = [CC] in {
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
+// Population count. Counts bits set per byte.
+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
+ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
+ "popcnt\t$R1, $R2",
+ [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
+}
+
// Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
def : Pat<(i64 (anyext GR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td
index 51ac5da..3151052 100644
--- a/lib/Target/SystemZ/SystemZOperators.td
+++ b/lib/Target/SystemZ/SystemZOperators.td
@@ -79,6 +79,9 @@ def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
def SDT_ZPrefetch : SDTypeProfile<0, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>]>;
+def SDT_ZTBegin : SDTypeProfile<0, 2,
+ [SDTCisPtrTy<0>,
+ SDTCisVT<1, i32>]>;
//===----------------------------------------------------------------------===//
// Node definitions
@@ -121,6 +124,7 @@ def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS",
SDT_ZExtractAccess>;
+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
@@ -179,6 +183,15 @@ def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
SDNPMemOperand]>;
+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
+ SDNPSideEffect]>;
+def z_tend : SDNode<"SystemZISD::TEND", SDTNone,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZProcessors.td b/lib/Target/SystemZ/SystemZProcessors.td
index 1594854..15614c9 100644
--- a/lib/Target/SystemZ/SystemZProcessors.td
+++ b/lib/Target/SystemZ/SystemZProcessors.td
@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
"Assume that the floating-point extension facility is installed"
>;
+def FeaturePopulationCount : SystemZFeature<
+ "population-count", "PopulationCount",
+ "Assume that the population-count facility is installed"
+>;
+
def FeatureFastSerialization : SystemZFeature<
"fast-serialization", "FastSerialization",
"Assume that the fast-serialization facility is installed"
@@ -50,13 +55,30 @@ def FeatureInterlockedAccess1 : SystemZFeature<
>;
def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
+def FeatureMiscellaneousExtensions : SystemZFeature<
+ "miscellaneous-extensions", "MiscellaneousExtensions",
+ "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+ "transactional-execution", "TransactionalExecution",
+ "Assume that the transactional-execution facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+ "processor-assist", "ProcessorAssist",
+ "Assume that the processor-assist facility is installed"
+>;
+
def : Processor<"generic", NoItineraries, []>;
def : Processor<"z10", NoItineraries, []>;
def : Processor<"z196", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1]>;
def : Processor<"zEC12", NoItineraries,
[FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
- FeatureFPExtension, FeatureFastSerialization,
- FeatureInterlockedAccess1]>;
+ FeatureFPExtension, FeaturePopulationCount,
+ FeatureFastSerialization, FeatureInterlockedAccess1,
+ FeatureMiscellaneousExtensions,
+ FeatureTransactionalExecution, FeatureProcessorAssist]>;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 31a2bff..de725ae 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -10,7 +10,6 @@
#include "SystemZSubtarget.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/Host.h"
using namespace llvm;
@@ -28,10 +27,6 @@ SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
if (CPUName.empty())
CPUName = "generic";
-#if defined(__linux__) && defined(__s390x__)
- if (CPUName == "generic")
- CPUName = sys::getHostCPUName();
-#endif
// Parse features string.
ParseSubtargetFeatures(CPUName, FS);
return *this;
@@ -43,7 +38,9 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT,
const TargetMachine &TM)
: SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
- HasFastSerialization(false), HasInterlockedAccess1(false),
+ HasPopulationCount(false), HasFastSerialization(false),
+ HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+ HasTransactionalExecution(false), HasProcessorAssist(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(*TM.getDataLayout()), FrameLowering() {}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 99cb1ad..c99e552 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -38,8 +38,12 @@ protected:
bool HasLoadStoreOnCond;
bool HasHighWord;
bool HasFPExtension;
+ bool HasPopulationCount;
bool HasFastSerialization;
bool HasInterlockedAccess1;
+ bool HasMiscellaneousExtensions;
+ bool HasTransactionalExecution;
+ bool HasProcessorAssist;
private:
Triple TargetTriple;
@@ -86,12 +90,26 @@ public:
// Return true if the target has the floating-point extension facility.
bool hasFPExtension() const { return HasFPExtension; }
+ // Return true if the target has the population-count facility.
+ bool hasPopulationCount() const { return HasPopulationCount; }
+
// Return true if the target has the fast-serialization facility.
bool hasFastSerialization() const { return HasFastSerialization; }
// Return true if the target has interlocked-access facility 1.
bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
+ // Return true if the target has the miscellaneous-extensions facility.
+ bool hasMiscellaneousExtensions() const {
+ return HasMiscellaneousExtensions;
+ }
+
+ // Return true if the target has the transactional-execution facility.
+ bool hasTransactionalExecution() const { return HasTransactionalExecution; }
+
+ // Return true if the target has the processor-assist facility.
+ bool hasProcessorAssist() const { return HasProcessorAssist; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 86baccb..b2f8175 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "SystemZTargetMachine.h"
+#include "SystemZTargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
@@ -108,3 +109,9 @@ void SystemZPassConfig::addPreEmitPass() {
TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
return new SystemZPassConfig(this, PM);
}
+
+TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
+ return TargetIRAnalysis([this](Function &F) {
+ return TargetTransformInfo(SystemZTTIImpl(this, F));
+ });
+}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 181b926..5ded07c 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -39,6 +39,7 @@ public:
}
// Override LLVMTargetMachine
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetIRAnalysis getTargetIRAnalysis() override;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
new file mode 100644
index 0000000..3337f63
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -0,0 +1,240 @@
+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a TargetTransformInfo analysis pass specific to the
+// SystemZ target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZTargetTransformInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemztti"
+
+//===----------------------------------------------------------------------===//
+//
+// SystemZ cost model.
+//
+//===----------------------------------------------------------------------===//
+
+unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ if (Imm == 0)
+ return TTI::TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ // Constants loaded via lgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Basic;
+ // Constants loaded via llihf:
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Basic;
+
+ return 2 * TTI::TCC_Basic;
+ }
+
+ return 4 * TTI::TCC_Basic;
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (Opcode) {
+ default:
+ return TTI::TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TTI::TCC_Basic;
+ return TTI::TCC_Free;
+ case Instruction::Store:
+ if (Idx == 0 && Imm.getBitWidth() <= 64) {
+ // Any 8-bit immediate store can by implemented via mvi.
+ if (BitSize == 8)
+ return TTI::TCC_Free;
+ // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
+ if (isInt<16>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::ICmp:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Comparisons against signed 32-bit immediates implemented via cgfi.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ // Comparisons against unsigned 32-bit immediates implemented via clgfi.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Or their negation, by swapping addition vs. subtraction.
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Mul:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // We use msgfi to multiply by 32-bit signed immediates.
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Or:
+ case Instruction::Xor:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Masks supported by oilf/xilf.
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // Masks supported by oihf/xihf.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0)
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::And:
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ // Any 32-bit AND operation can by implemented via nilf.
+ if (BitSize <= 32)
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilf.
+ if (isUInt<32>(~Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ // 64-bit masks supported by nilh.
+ if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
+ return TTI::TCC_Free;
+ // Some 64-bit AND operations can be implemented via risbg.
+ const SystemZInstrInfo *TII = ST->getInstrInfo();
+ unsigned Start, End;
+ if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ // Always return TCC_Free for the shift value of a shift instruction.
+ if (Idx == 1)
+ return TTI::TCC_Free;
+ break;
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ break;
+ }
+
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
+ switch (IID) {
+ default:
+ return TTI::TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ // These get expanded to include a normal addition/subtraction.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isUInt<32>(Imm.getZExtValue()))
+ return TTI::TCC_Free;
+ if (isUInt<32>(-Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ // These get expanded to include a normal multiplication.
+ if (Idx == 1 && Imm.getBitWidth() <= 64) {
+ if (isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+ }
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TTI::TCC_Free;
+ break;
+ }
+ return SystemZTTIImpl::getIntImmCost(Imm, Ty);
+}
+
+TargetTransformInfo::PopcntSupportKind
+SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
+ if (ST->hasPopulationCount() && TyWidth <= 64)
+ return TTI::PSK_FastHardware;
+ return TTI::PSK_Software;
+}
+
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
new file mode 100644
index 0000000..d498913
--- /dev/null
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- SystemZTargetTransformInfo.h - SystemZ-specific TTI ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETTRANSFORMINFO_H
+
+#include "SystemZTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
+ typedef BasicTTIImplBase<SystemZTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const SystemZSubtarget *ST;
+ const SystemZTargetLowering *TLI;
+
+ const SystemZSubtarget *getST() const { return ST; }
+ const SystemZTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+ : BaseT(TM), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {}
+
+ // Provide value semantics. MSVC requires that we spell all of these out.
+ SystemZTTIImpl(const SystemZTTIImpl &Arg)
+ : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+ SystemZTTIImpl(SystemZTTIImpl &&Arg)
+ : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+ TLI(std::move(Arg.TLI)) {}
+ SystemZTTIImpl &operator=(const SystemZTTIImpl &RHS) {
+ BaseT::operator=(static_cast<const BaseT &>(RHS));
+ ST = RHS.ST;
+ TLI = RHS.TLI;
+ return *this;
+ }
+ SystemZTTIImpl &operator=(SystemZTTIImpl &&RHS) {
+ BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
+ ST = std::move(RHS.ST);
+ TLI = std::move(RHS.TLI);
+ return *this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty);
+
+ TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+
+ /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 75100fb..db543f3 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -110,7 +110,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
NameStr += DL->getPrivateGlobalPrefix();
TM.getNameWithPrefix(NameStr, GV, Mang);
NameStr.append(Suffix.begin(), Suffix.end());
- return Ctx->GetOrCreateSymbol(NameStr.str());
+ return Ctx->GetOrCreateSymbol(NameStr);
}
MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index dd07f81..5807cf7 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -187,5 +187,5 @@ MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
SmallString<60> NameStr;
getNameWithPrefix(NameStr, GV, Mang);
const TargetLoweringObjectFile *TLOF = getObjFileLowering();
- return TLOF->getContext().GetOrCreateSymbol(NameStr.str());
+ return TLOF->getContext().GetOrCreateSymbol(NameStr);
}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 236cb1b..1a5bf16 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -183,7 +183,9 @@ void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
}
static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
- formatted_raw_ostream &OS, LLVMCodeGenFileType codegen, char **ErrorMessage) {
+ raw_pwrite_stream &OS,
+ LLVMCodeGenFileType codegen,
+ char **ErrorMessage) {
TargetMachine* TM = unwrap(T);
Module* Mod = unwrap(M);
@@ -229,8 +231,7 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
*ErrorMessage = strdup(EC.message().c_str());
return true;
}
- formatted_raw_ostream destf(dest);
- bool Result = LLVMTargetMachineEmit(T, M, destf, codegen, ErrorMessage);
+ bool Result = LLVMTargetMachineEmit(T, M, dest, codegen, ErrorMessage);
dest.flush();
return Result;
}
@@ -238,15 +239,14 @@ LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
LLVMModuleRef M, LLVMCodeGenFileType codegen, char** ErrorMessage,
LLVMMemoryBufferRef *OutMemBuf) {
- std::string CodeString;
- raw_string_ostream OStream(CodeString);
- formatted_raw_ostream Out(OStream);
- bool Result = LLVMTargetMachineEmit(T, M, Out, codegen, ErrorMessage);
+ SmallString<0> CodeString;
+ raw_svector_ostream OStream(CodeString);
+ bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
OStream.flush();
- std::string &Data = OStream.str();
- *OutMemBuf = LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.c_str(),
- Data.length(), "");
+ StringRef Data = OStream.str();
+ *OutMemBuf =
+ LLVMCreateMemoryBufferWithMemoryRangeCopy(Data.data(), Data.size(), "");
return Result;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c24805a..93c6ea0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2571,7 +2571,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallString<16> Tmp;
Tmp += Base;
Tmp += ' ';
- Op.setTokenValue(Tmp.str());
+ Op.setTokenValue(Tmp);
// If this instruction starts with an 'f', then it is a floating point stack
// instruction. These come in up to three forms for 32-bit, 64-bit, and
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 65461af..f265f1d 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -33,15 +33,12 @@ using namespace llvm;
#define PRINT_ALIAS_INSTR
#include "X86GenAsmWriter.inc"
-void X86ATTInstPrinter::printRegName(raw_ostream &OS,
- unsigned RegNo) const {
- OS << markup("<reg:")
- << '%' << getRegisterName(RegNo)
- << markup(">");
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << markup("<reg:") << '%' << getRegisterName(RegNo) << markup(">");
}
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
@@ -60,7 +57,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// InstrInfo.td as soon as Requires clause is supported properly
// for InstAlias.
if (MI->getOpcode() == X86::CALLpcrel32 &&
- (getAvailableFeatures() & X86::Mode64Bit) != 0) {
+ (STI.getFeatureBits() & X86::Mode64Bit) != 0) {
OS << "\tcallq\t";
printPCRelImm(MI, 0, OS);
}
@@ -169,8 +166,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
printRegName(O, Op.getReg());
} else if (Op.isImm()) {
// Print X86 immediates as signed values.
- O << markup("<imm:")
- << '$' << formatImm((int64_t)Op.getImm())
+ O << markup("<imm:") << '$' << formatImm((int64_t)Op.getImm())
<< markup(">");
// If there are no instruction-specific comments, add a comment clarifying
@@ -182,24 +178,22 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << markup("<imm:")
- << '$' << *Op.getExpr()
- << markup(">");
+ O << markup("<imm:") << '$' << *Op.getExpr() << markup(">");
}
}
void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
- const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
- const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
- const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
+ const MCOperand &BaseReg = MI->getOperand(Op + X86::AddrBaseReg);
+ const MCOperand &IndexReg = MI->getOperand(Op + X86::AddrIndexReg);
+ const MCOperand &DispSpec = MI->getOperand(Op + X86::AddrDisp);
+ const MCOperand &SegReg = MI->getOperand(Op + X86::AddrSegmentReg);
O << markup("<mem:");
// If this has a segment register, print it.
if (SegReg.getReg()) {
- printOperand(MI, Op+X86::AddrSegmentReg, O);
+ printOperand(MI, Op + X86::AddrSegmentReg, O);
O << ':';
}
@@ -215,16 +209,14 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (IndexReg.getReg() || BaseReg.getReg()) {
O << '(';
if (BaseReg.getReg())
- printOperand(MI, Op+X86::AddrBaseReg, O);
+ printOperand(MI, Op + X86::AddrBaseReg, O);
if (IndexReg.getReg()) {
O << ',';
- printOperand(MI, Op+X86::AddrIndexReg, O);
- unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
+ printOperand(MI, Op + X86::AddrIndexReg, O);
+ unsigned ScaleVal = MI->getOperand(Op + X86::AddrScaleAmt).getImm();
if (ScaleVal != 1) {
- O << ','
- << markup("<imm:")
- << ScaleVal // never printed in hex.
+ O << ',' << markup("<imm:") << ScaleVal // never printed in hex.
<< markup(">");
}
}
@@ -236,13 +228,13 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
void X86ATTInstPrinter::printSrcIdx(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- const MCOperand &SegReg = MI->getOperand(Op+1);
+ const MCOperand &SegReg = MI->getOperand(Op + 1);
O << markup("<mem:");
// If this has a segment register, print it.
if (SegReg.getReg()) {
- printOperand(MI, Op+1, O);
+ printOperand(MI, Op + 1, O);
O << ':';
}
@@ -267,13 +259,13 @@ void X86ATTInstPrinter::printDstIdx(const MCInst *MI, unsigned Op,
void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
raw_ostream &O) {
const MCOperand &DispSpec = MI->getOperand(Op);
- const MCOperand &SegReg = MI->getOperand(Op+1);
+ const MCOperand &SegReg = MI->getOperand(Op + 1);
O << markup("<mem:");
// If this has a segment register, print it.
if (SegReg.getReg()) {
- printOperand(MI, Op+1, O);
+ printOperand(MI, Op + 1, O);
O << ':';
}
@@ -289,7 +281,6 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
raw_ostream &O) {
- O << markup("<imm:")
- << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
+ O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
<< markup(">");
}
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index f71cb81..62b6b73 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -24,14 +24,12 @@ class MCOperand;
class X86ATTInstPrinter final : public MCInstPrinter {
public:
X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
- }
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
@@ -142,7 +140,6 @@ public:
private:
bool HasCustomInstComment;
};
-
}
#endif
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 91d1828..4d92daf 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -33,7 +33,8 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
- StringRef Annot) {
+ StringRef Annot,
+ const MCSubtargetInfo &STI) {
const MCInstrDesc &Desc = MII.get(MI->getOpcode());
uint64_t TSFlags = Desc.TSFlags;
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 2150144..6e371da 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -28,7 +28,8 @@ public:
: MCInstPrinter(MAI, MII, MRI) {}
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index a400d46..b84c983 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -360,7 +360,7 @@ public:
ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
: ELFX86AsmBackend(T, OSABI, CPU) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386);
}
};
@@ -370,7 +370,7 @@ public:
ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
: ELFX86AsmBackend(T, OSABI, CPU) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
ELF::EM_X86_64);
}
@@ -381,7 +381,7 @@ public:
ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
: ELFX86AsmBackend(T, OSABI, CPU) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64);
}
};
@@ -395,7 +395,7 @@ public:
, Is64Bit(is64Bit) {
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86WinCOFFObjectWriter(OS, Is64Bit);
}
};
@@ -752,7 +752,7 @@ public:
StringRef CPU)
: DarwinX86AsmBackend(T, MRI, CPU, false) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
MachO::CPU_TYPE_I386,
MachO::CPU_SUBTYPE_I386_ALL);
@@ -772,7 +772,7 @@ public:
StringRef CPU, MachO::CPUSubTypeX86 st)
: DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
MachO::CPU_TYPE_X86_64, Subtype);
}
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 76a9d2b..4508883 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -22,7 +22,8 @@ namespace {
public:
X86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
- virtual ~X86ELFObjectWriter();
+ ~X86ELFObjectWriter() override;
+
protected:
unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel) const override;
@@ -248,9 +249,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
return getRelocType32(Modifier, getType32(Type), IsPCRel);
}
-MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
- bool IsELF64,
- uint8_t OSABI,
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_pwrite_stream &OS,
+ bool IsELF64, uint8_t OSABI,
uint16_t EMachine) {
MCELFObjectTargetWriter *MOTW =
new X86ELFObjectWriter(IsELF64, OSABI, EMachine);
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 9b98a3e..e27b7cb 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -39,7 +39,7 @@ public:
: MCII(mcii), Ctx(ctx) {
}
- ~X86MCCodeEmitter() {}
+ ~X86MCCodeEmitter() override {}
bool is64BitMode(const MCSubtargetInfo &STI) const {
return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 0946326..5bdd844 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -80,7 +80,7 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
std::string ArchFS = X86_MC::ParseX86Triple(TT);
if (!FS.empty()) {
if (!ArchFS.empty())
- ArchFS = ArchFS + "," + FS.str();
+ ArchFS = (Twine(ArchFS) + "," + FS).str();
else
ArchFS = FS;
}
@@ -207,14 +207,13 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+static MCInstPrinter *createX86MCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI, MII, MRI, STI);
+ return new X86ATTInstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
return new X86IntelInstPrinter(MAI, MII, MRI);
return nullptr;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6f50f11..dcdae1d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -31,10 +31,11 @@ class Target;
class Triple;
class StringRef;
class raw_ostream;
+class raw_pwrite_stream;
extern Target TheX86_32Target, TheX86_64Target;
-/// DWARFFlavour - Flavour of dwarf regnumbers
+/// Flavour of dwarf regnumbers
///
namespace DWARFFlavour {
enum {
@@ -42,7 +43,7 @@ namespace DWARFFlavour {
};
}
-/// N86 namespace - Native X86 register numbers
+/// Native X86 register numbers
///
namespace N86 {
enum {
@@ -57,9 +58,8 @@ namespace X86_MC {
void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI);
- /// createX86MCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
- /// This is exposed so Asm parser, etc. do not need to go through
- /// TargetRegistry.
+ /// Create a X86 MCSubtargetInfo instance. This is exposed so Asm parser, etc.
+ /// do not need to go through TargetRegistry.
MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS);
}
@@ -78,27 +78,25 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
///
/// Takes ownership of \p AB and \p CE.
MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- raw_ostream &OS, MCCodeEmitter *CE,
+ raw_pwrite_stream &OS, MCCodeEmitter *CE,
bool RelaxAll);
-/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
-MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
+/// Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
uint32_t CPUType,
uint32_t CPUSubtype);
-/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
-MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
- bool IsELF64,
- uint8_t OSABI,
- uint16_t EMachine);
-/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
-MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
+/// Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_pwrite_stream &OS, bool IsELF64,
+ uint8_t OSABI, uint16_t EMachine);
+/// Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit);
-/// createX86_64MachORelocationInfo - Construct X86-64 Mach-O relocation info.
+/// Construct X86-64 Mach-O relocation info.
MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
-/// createX86_64ELFORelocationInfo - Construct X86-64 ELF relocation info.
+/// Construct X86-64 ELF relocation info.
MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
} // End llvm namespace
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7a83f4c..38539cd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -575,9 +575,8 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
- bool Is64Bit,
- uint32_t CPUType,
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_pwrite_stream &OS,
+ bool Is64Bit, uint32_t CPUType,
uint32_t CPUSubtype) {
return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
CPUType,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index e1df5c2..bd1bc99 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -25,7 +25,7 @@ namespace {
class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
public:
X86WinCOFFObjectWriter(bool Is64Bit);
- virtual ~X86WinCOFFObjectWriter();
+ ~X86WinCOFFObjectWriter() override;
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsCrossSection,
@@ -90,7 +90,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
llvm_unreachable("Unsupported COFF machine type.");
}
-MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit) {
MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
return createWinCOFFObjectWriter(MOTW, OS);
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 5690efe..92f42b6 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -18,8 +18,8 @@ class X86WinCOFFStreamer : public MCWinCOFFStreamer {
Win64EH::UnwindEmitter EHStreamer;
public:
X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
- raw_ostream &OS)
- : MCWinCOFFStreamer(C, AB, *CE, OS) { }
+ raw_pwrite_stream &OS)
+ : MCWinCOFFStreamer(C, AB, *CE, OS) {}
void EmitWinEHHandlerData() override;
void EmitWindowsUnwindTables() override;
@@ -49,8 +49,8 @@ void X86WinCOFFStreamer::FinishImpl() {
}
MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
- raw_ostream &OS, MCCodeEmitter *CE,
- bool RelaxAll) {
+ raw_pwrite_stream &OS,
+ MCCodeEmitter *CE, bool RelaxAll) {
X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
S->getAssembler().setRelaxAll(RelaxAll);
return S;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4f9836d..d13f155 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -273,17 +273,15 @@ def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
// "Arrandale" along with corei3 and corei5
-class NehalemProc<string Name, list<SubtargetFeature> AdditionalFeatures>
- : ProcessorModel<Name, SandyBridgeModel, !listconcat([
- FeatureSSE42,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem,
- FeatureFastUAMem,
- FeaturePOPCNT
- ],
- AdditionalFeatures)>;
-def : NehalemProc<"nehalem", []>;
-def : NehalemProc<"corei7", [FeatureAES]>;
+class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
+ FeatureSSE42,
+ FeatureCMPXCHG16B,
+ FeatureSlowBTMem,
+ FeatureFastUAMem,
+ FeaturePOPCNT
+ ]>;
+def : NehalemProc<"nehalem">;
+def : NehalemProc<"corei7">;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index f6033a7..2ed4975 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -523,7 +523,6 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
// must be registered in .sxdata. Use of any unregistered handlers will
// cause the process to terminate immediately. LLVM does not know how to
// register any SEH handlers, so its object files should be safe.
- S->setAbsolute();
OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
OutStreamer.EmitAssignment(
S, MCConstantExpr::Create(int64_t(1), MMI->getContext()));
@@ -723,28 +722,8 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
- if (TT.isOSBinFormatELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
-
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
-
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
-
- for (const auto &Stub : Stubs) {
- OutStreamer.EmitLabel(Stub.first);
- OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
- TD->getPointerSize());
- }
- Stubs.clear();
- }
-
+ if (TT.isOSBinFormatELF())
SM.serializeToStackMapSection();
- }
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cba140f..cdf10a7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -2417,6 +2417,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
+ assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
+ "Expected inlined-at fields to agree");
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
.addImm(0)
.addMetadata(DI->getVariable())
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fb12ce5..5da7acf 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2187,7 +2187,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
break;
- unsigned ShlOp, Op;
+ unsigned ShlOp, AddOp, Op;
MVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
@@ -2208,6 +2208,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i32:
assert(CstVT == MVT::i8);
ShlOp = X86::SHL32ri;
+ AddOp = X86::ADD32rr;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
@@ -2219,6 +2220,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
case MVT::i64:
assert(CstVT == MVT::i8 || CstVT == MVT::i32);
ShlOp = X86::SHL64ri;
+ AddOp = X86::ADD64rr;
switch (Opcode) {
default: llvm_unreachable("Impossible opcode");
@@ -2232,6 +2234,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Emit the smaller op and the shift.
SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ if (ShlVal == 1)
+ return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
+ SDValue(New, 0));
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8b92e70..c32412a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
@@ -2142,6 +2143,7 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
/*isVolatile*/false, /*AlwaysInline=*/true,
+ /*isTailCall*/false,
MachinePointerInfo(), MachinePointerInfo());
}
@@ -2277,6 +2279,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+ const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
const Function* Fn = MF.getFunction();
if (Fn->hasExternalLinkage() &&
@@ -2416,6 +2419,13 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
MFI->CreateFixedObject(1, StackSize, true));
}
+ MachineModuleInfo &MMI = MF.getMMI();
+ const Function *WinEHParent = nullptr;
+ if (IsWin64 && MMI.hasWinEHFuncInfo(Fn))
+ WinEHParent = MMI.getWinEHParent(Fn);
+ bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
+ bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
+
// Figure out if XMM registers are in use.
assert(!(MF.getTarget().Options.UseSoftFloat &&
Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2452,7 +2462,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
if (IsWin64) {
- const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
// Get to the caller-allocated home save location. Add 8 to account
// for the return address.
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
@@ -2505,6 +2514,27 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+ } else if (IsWinEHOutlined) {
+ // Get to the caller-allocated home save location. Add 8 to account
+ // for the return address.
+ int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+ FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
+ /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
+
+ MMI.getWinEHFuncInfo(Fn)
+ .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
+ FuncInfo->getRegSaveFrameIndex();
+
+ // Store the second integer parameter (rdx) into rsp+16 relative to the
+ // stack pointer at the entry of the function.
+ SDValue RSFIN =
+ DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy());
+ unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
+ Chain = DAG.getStore(
+ Val.getValue(1), dl, Val, RSFIN,
+ MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
+ /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
}
if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2571,6 +2601,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
FuncInfo->setArgumentStackSize(StackSize);
+ if (IsWinEHParent) {
+ int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+ SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
+ MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
+ SDValue Neg2 = DAG.getConstant(-2, MVT::i64);
+ Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
+ MachinePointerInfo::getFixedStack(UnwindHelpFI),
+ /*isVolatile=*/true,
+ /*isNonTemporal=*/false, /*Alignment=*/0);
+ }
+
return Chain;
}
@@ -4420,6 +4461,29 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
SDLoc dl(Op);
SDValue V;
bool First = true;
+
+ // SSE4.1 - use PINSRB to insert each byte directly.
+ if (Subtarget->hasSSE41()) {
+ for (unsigned i = 0; i < 16; ++i) {
+ bool isNonZero = (NonZeros & (1 << i)) != 0;
+ if (isNonZero) {
+ if (First) {
+ if (NumZero)
+ V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
+ else
+ V = DAG.getUNDEF(MVT::v16i8);
+ First = false;
+ }
+ V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ MVT::v16i8, V, Op.getOperand(i),
+ DAG.getIntPtrConstant(i));
+ }
+ }
+
+ return V;
+ }
+
+ // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
if (ThisIsNonZero && First) {
@@ -5650,14 +5714,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
+ // We can't directly insert an i8 or i16 into a vector, so zero extend
+ // it to i32 first.
if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
- Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
if (VT.is256BitVector()) {
- SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
- Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+ if (Subtarget->hasAVX()) {
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v8i32, Item);
+ Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+ } else {
+ // Without AVX, we need to extend to a 128-bit vector and then
+ // insert into the 256-bit vector.
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+ SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+ Item = Insert128BitVector(ZeroVec, Item, 0, DAG, dl);
+ }
} else {
assert(VT.is128BitVector() && "Expected an SSE value type!");
+ Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
return DAG.getNode(ISD::BITCAST, dl, VT, Item);
@@ -5877,7 +5951,7 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
unsigned NumElems = ResVT.getVectorNumElements();
- if(ResVT.is256BitVector())
+ if (ResVT.is256BitVector())
return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
if (Op.getNumOperands() == 4) {
@@ -9281,15 +9355,6 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
- // If we have a single input to the zero element, insert that into V1 if we
- // can do so cheaply.
- int NumV2Elements =
- std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
- if (NumV2Elements == 1 && Mask[0] >= 4)
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
- DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
- return Insertion;
-
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -9432,15 +9497,6 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
- // If we have a single input to the zero element, insert that into V1 if we
- // can do so cheaply.
- int NumV2Elements =
- std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 8; });
- if (NumV2Elements == 1 && Mask[0] >= 8)
- if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
- DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG))
- return Insertion;
-
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
Subtarget, DAG))
return Blend;
@@ -9811,6 +9867,18 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
ArrayRef<int> Mask = SVOp->getMask();
+ // If we have a single input to the zero element, insert that into V1 if we
+ // can do so cheaply.
+ int NumElts = VT.getVectorNumElements();
+ int NumV2Elements = std::count_if(Mask.begin(), Mask.end(), [NumElts](int M) {
+ return M >= NumElts;
+ });
+
+ if (NumV2Elements == 1 && Mask[0] >= NumElts)
+ if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+ DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return Insertion;
+
// There is a really nice hard cut-over between AVX1 and AVX2 that means we can
// check for those subtargets here and avoid much of the subtarget querying in
// the per-vector-type lowering routines. With AVX1 we have essentially *zero*
@@ -11903,7 +11971,7 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
// Now we have only mask extension
assert(InVT.getVectorElementType() == MVT::i1);
SDValue Cst = DAG.getTargetConstant(1, ExtVT.getScalarType());
- const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -11979,7 +12047,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
- const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
+ const Constant *C = cast<ConstantSDNode>(Cst)->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
SDValue Ld = DAG.getLoad(Cst.getValueType(), DL, DAG.getEntryNode(), CP,
@@ -12750,6 +12818,16 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
return SDValue();
}
+/// If we have at least two divisions that use the same divisor, convert to
+/// multplication by a reciprocal. This may need to be adjusted for a given
+/// CPU if a division's cost is not at least twice the cost of a multiplication.
+/// This is because we still need one division to calculate the reciprocal and
+/// then we need two multiplies by that reciprocal as replacements for the
+/// original divisions.
+bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+ return NumUsers > 1;
+}
+
static bool isAllOnes(SDValue V) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
return C && C->isAllOnesValue();
@@ -14427,7 +14505,7 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
- false,
+ false, false,
MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
}
@@ -15220,10 +15298,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
}
case PREFETCH: {
SDValue Hint = Op.getOperand(6);
- unsigned HintVal;
- if (dyn_cast<ConstantSDNode> (Hint) == nullptr ||
- (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
- llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
+ unsigned HintVal = cast<ConstantSDNode>(Hint)->getZExtValue();
+ assert(HintVal < 2 && "Wrong prefetch hint in intrinsic: should be 0 or 1");
unsigned Opcode = (HintVal ? IntrData->Opc1 : IntrData->Opc0);
SDValue Chain = Op.getOperand(0);
SDValue Mask = Op.getOperand(2);
@@ -24175,7 +24251,7 @@ TargetLowering::ConstraintWeight
break;
case 'G':
case 'C':
- if (dyn_cast<ConstantFP>(CallOperandVal)) {
+ if (isa<ConstantFP>(CallOperandVal)) {
weight = CW_Constant;
}
break;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index dd20ec2..5130c37 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -1072,6 +1072,9 @@ namespace llvm {
/// Use rcp* to speed up fdiv calculations.
SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const override;
+
+ /// Reassociate floating point divisions into multiply by reciprocal.
+ bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
};
namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 509602f..0959162 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2971,60 +2971,36 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
itins, HasBWI, IsCommutable>;
}
-multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, ValueType DstVT,
- ValueType SrcVT, RegisterClass KRC, RegisterClass RC,
- PatFrag memop_frag, X86MemOperand x86memop,
- PatFrag scalar_mfrag, X86MemOperand x86scalar_mop,
- string BrdcstStr, OpndItins itins, bit IsCommutable = 0> {
- let isCommutable = IsCommutable in
- {
- def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX_4V;
- def rrk : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], itins.rr>, EVEX_4V, EVEX_K;
- def rrkz : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, RC:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}} {z}" ,
- "|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rr>, EVEX_4V, EVEX_KZ;
- }
+multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
+ SDNode OpNode,X86VectorVTInfo _Src,
+ X86VectorVTInfo _Dst, bit IsCommutable = 0> {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+ "$src2, $src1","$src1, $src2",
+ (_Dst.VT (OpNode
+ (_Src.VT _Src.RC:$src1),
+ (_Src.VT _Src.RC:$src2))),
+ "",itins.rr, IsCommutable>,
+ AVX512BIBase, EVEX_4V;
let mayLoad = 1 in {
- def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, EVEX_4V;
- def rmk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}"),
- [], itins.rm>, EVEX_4V, EVEX_K;
- def rmkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86memop:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}"),
- [], itins.rm>, EVEX_4V, EVEX_KZ;
- def rmb : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
- ", $src1, $dst|$dst, $src1, ${src2}", BrdcstStr, "}"),
- [], itins.rm>, EVEX_4V, EVEX_B;
- def rmbk : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}}|$dst {${mask}}, $src1, ${src2}",
- BrdcstStr, "}"),
- [], itins.rm>, EVEX_4V, EVEX_B, EVEX_K;
- def rmbkz : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
- (ins KRC:$mask, RC:$src1, x86scalar_mop:$src2),
- !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
- ", $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, ${src2}",
- BrdcstStr, "}"),
- [], itins.rm>, EVEX_4V, EVEX_B, EVEX_KZ;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1),
+ (bitconvert (_Src.LdFrag addr:$src2)))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V;
+
+ defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+ (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
+ OpcodeStr,
+ "${src2}"##_Dst.BroadcastStr##", $src1",
+ "$src1, ${src2}"##_Dst.BroadcastStr,
+ (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bc_v16i32
+ (_Dst.VT (X86VBroadcast
+ (_Dst.ScalarLdFrag addr:$src2)))))),
+ "", itins.rm>,
+ AVX512BIBase, EVEX_4V, EVEX_B;
}
}
@@ -3039,24 +3015,13 @@ defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", v8i64, v16i32, VK8WM, VR512,
- loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTALU_ITINS_P, 1>, T8PD, EVEX_V512,
- EVEX_CD8<64, CD8VF>, VEX_W;
-
-defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", v8i64, v16i32, VK8WM, VR512,
- loadv8i64, i512mem, loadi64, i64mem, "{1to8}",
- SSE_INTMUL_ITINS_P, 1>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VPMULDQZ : avx512_binop_rm2<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
+ X86pmuldq, v16i32_info, v8i64_info, 1>,
+ T8PD, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
-def : Pat<(v8i64 (X86pmuludq (v16i32 VR512:$src1), (v16i32 VR512:$src2))),
- (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-
-def : Pat<(v8i64 (int_x86_avx512_mask_pmulu_dq_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMULUDQZrr VR512:$src1, VR512:$src2)>;
-def : Pat<(v8i64 (int_x86_avx512_mask_pmul_dq_512 (v16i32 VR512:$src1),
- (v16i32 VR512:$src2), (bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
- (VPMULDQZrr VR512:$src1, VR512:$src2)>;
+defm VPMULUDQZ : avx512_binop_rm2<0xF4, "vpmuludq", SSE_INTMUL_ITINS_P,
+ X86pmuludq, v16i32_info, v8i64_info, 1>,
+ EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@@ -3208,7 +3173,7 @@ defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
SSE_INTALU_ITINS_P, HasAVX512, 1>;
defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
- SSE_INTALU_ITINS_P, HasAVX512, 1>;
+ SSE_INTALU_ITINS_P, HasAVX512, 0>;
//===----------------------------------------------------------------------===//
// AVX-512 FP arithmetic
@@ -3743,16 +3708,19 @@ multiclass avx512_fma3p_rm<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
- OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ),
- (OpNode _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
+ !strconcat("$src2, ${src3}", _.BroadcastStr ),
+ (OpNode _.RC:$src1,
+ _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
} // Constraints = "$src1 = $dst"
let Constraints = "$src1 = $dst" in {
// Omitting the parameter OpNode (= null_frag) disables ISel pattern matching.
-multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
- SDPatternOperator OpNode> {
+multiclass avx512_fma3_round_rrb<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo _,
+ SDPatternOperator OpNode> {
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
@@ -3772,7 +3740,6 @@ multiclass avx512_fma3p_forms<bits<8> opc213, bits<8> opc231,
SDPatternOperator OpNode> {
defm v213r : avx512_fma3p_rm<opc213, !strconcat(OpcodeStr, "213", VTI.Suffix),
VTI, OpNode>, EVEX_CD8<VTI.EltSize, CD8VF>;
-
defm v231r : avx512_fma3p_rm<opc231, !strconcat(OpcodeStr, "231", VTI.Suffix),
VTI>, EVEX_CD8<VTI.EltSize, CD8VF>;
}
@@ -3794,12 +3761,14 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
defm NAME##PDZ : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
v8f64_info, OpNode>,
- avx512_fma3_round_forms<opc213, OpcodeStr,
- v8f64_info, OpNodeRnd>, EVEX_V512, VEX_W;
+ avx512_fma3_round_forms<opc213, OpcodeStr, v8f64_info,
+ OpNodeRnd>, EVEX_V512, VEX_W;
defm NAME##PDZ256 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v4f64x_info, OpNode>, EVEX_V256, VEX_W;
+ v4f64x_info, OpNode>,
+ EVEX_V256, VEX_W;
defm NAME##PDZ128 : avx512_fma3p_forms<opc213, opc231, OpcodeStr,
- v2f64x_info, OpNode>, EVEX_V128, VEX_W;
+ v2f64x_info, OpNode>,
+ EVEX_V128, VEX_W;
}
}
@@ -3830,26 +3799,29 @@ multiclass avx512_fma3p_m132<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // Constraints = "$src1 = $dst"
-
-multiclass avx512_fma3p_m132_f<bits<8> opc,
- string OpcodeStr,
- SDNode OpNode> {
+multiclass avx512_fma3p_m132_f<bits<8> opc, string OpcodeStr, SDNode OpNode> {
let ExeDomain = SSEPackedSingle in {
defm NAME##PSZ : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode,v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ OpNode,v16f32_info>, EVEX_V512,
+ EVEX_CD8<32, CD8VF>;
defm NAME##PSZ256 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>;
+ OpNode, v8f32x_info>, EVEX_V256,
+ EVEX_CD8<32, CD8VF>;
defm NAME##PSZ128 : avx512_fma3p_m132<opc, OpcodeStr##ps,
- OpNode, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>;
+ OpNode, v4f32x_info>, EVEX_V128,
+ EVEX_CD8<32, CD8VF>;
}
let ExeDomain = SSEPackedDouble in {
defm NAME##PDZ : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VF>;
+ OpNode, v8f64_info>, EVEX_V512,
+ VEX_W, EVEX_CD8<32, CD8VF>;
defm NAME##PDZ256 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<32, CD8VF>;
+ OpNode, v4f64x_info>, EVEX_V256,
+ VEX_W, EVEX_CD8<32, CD8VF>;
defm NAME##PDZ128 : avx512_fma3p_m132<opc, OpcodeStr##pd,
- OpNode, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<32, CD8VF>;
+ OpNode, v2f64x_info>, EVEX_V128,
+ VEX_W, EVEX_CD8<32, CD8VF>;
}
}
@@ -3860,7 +3832,6 @@ defm VFMSUBADD132 : avx512_fma3p_m132_f<0x97, "vfmsubadd132", X86Fmsubadd>;
defm VFNMADD132 : avx512_fma3p_m132_f<0x9C, "vfnmadd132", X86Fnmadd>;
defm VFNMSUB132 : avx512_fma3p_m132_f<0x9E, "vfnmsub132", X86Fnmsub>;
-
// Scalar FMA
let Constraints = "$src1 = $dst" in {
multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -3883,7 +3854,6 @@ multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpVT (OpNode RC:$src2, RC:$src1,
(mem_frag addr:$src3))))]>;
}
-
} // Constraints = "$src1 = $dst"
defm VFMADDSSZ : avx512_fma3s_rm<0xA9, "vfmadd213ss", X86Fmadd, FR32X,
@@ -3920,6 +3890,7 @@ let hasSideEffects = 0 in {
EVEX_4V;
} // hasSideEffects = 0
}
+
let Predicates = [HasAVX512] in {
defm VCVTSI2SSZ : avx512_vcvtsi<0x2A, GR32, FR32X, i32mem, "cvtsi2ss{l}">,
XS, VEX_LIG, EVEX_CD8<32, CD8VT1>;
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 78efc4d..5e19ad4 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1216,10 +1216,10 @@ def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
let isCompare = 1 in {
let Defs = [EFLAGS] in {
let isCommutable = 1 in {
- def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
- def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
- def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
- def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+ def TEST8rr : BinOpRR_F<0x84, "test", Xi8 , X86testpat>;
+ def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat>;
+ def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat>;
+ def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat>;
} // isCommutable
def TEST8rm : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 18bbe5d..45e6d0a 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1232,7 +1232,11 @@ def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
// with implicit zero-extension instead of a 64-bit and if the immediate has at
// least 32 bits of leading zeros. If in addition the last 32 bits can be
// represented with a sign extension of a 8 bit constant, use that.
+// This can also reduce instruction size by eliminating the need for the REX
+// prefix.
+// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
+let AddedComplexity = 1 in {
def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
(SUBREG_TO_REG
(i64 0),
@@ -1248,8 +1252,13 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo32XForm imm:$imm))),
sub_32bit)>;
+} // AddedComplexity = 1
+// AddedComplexity is needed due to the increased complexity on the
+// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
+// the MOVZX patterns keeps thems together in DAGIsel tables.
+let AddedComplexity = 1 in {
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
(MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
@@ -1272,6 +1281,7 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
(MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
sub_32bit)>;
// r & (2^16-1) ==> movz
+let AddedComplexity = 1 in // Give priority over i64immZExt32.
def : Pat<(and GR64:$src, 0xffff),
(SUBREG_TO_REG (i64 0),
(MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
@@ -1290,6 +1300,7 @@ def : Pat<(and GR16:$src1, 0xff),
(EXTRACT_SUBREG (MOVZX32rr8 (i8
(EXTRACT_SUBREG GR16:$src1, sub_8bit))), sub_16bit)>,
Requires<[In64BitMode]>;
+} // AddedComplexity = 1
// sext_inreg patterns
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 0bdabdf..b75a9f4 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -631,53 +631,53 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_load node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedLoadSDNode>(N))
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 16;
return false;
}]>;
def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_load node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedLoadSDNode>(N))
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 32;
return false;
}]>;
def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_load node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedLoadSDNode>(N))
- return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
+ if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
+ return Load->getAlignment() >= 64;
return false;
}]>;
def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_load node:$src1, node:$src2, node:$src3), [{
- return (dyn_cast<MaskedLoadSDNode>(N) != 0);
+ return isa<MaskedLoadSDNode>(N);
}]>;
def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedStoreSDNode>(N))
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 16;
return false;
}]>;
def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedStoreSDNode>(N))
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 32;
return false;
}]>;
def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- if (dyn_cast<MaskedStoreSDNode>(N))
- return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
+ if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
+ return Store->getAlignment() >= 64;
return false;
}]>;
def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(masked_store node:$src1, node:$src2, node:$src3), [{
- return (dyn_cast<MaskedStoreSDNode>(N) != 0);
+ return isa<MaskedStoreSDNode>(N);
}]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 538ec1c..fbfd868 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -559,6 +559,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 },
{ X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
+ // 3DNow! version of foldable instructions
+ { X86::PF2IDrr, X86::PF2IDrm, 0 },
+ { X86::PF2IWrr, X86::PF2IWrm, 0 },
+ { X86::PFRCPrr, X86::PFRCPrm, 0 },
+ { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
+ { X86::PI2FDrr, X86::PI2FDrm, 0 },
+ { X86::PI2FWrr, X86::PI2FWrm, 0 },
+ { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
+
// AVX 128-bit versions of foldable instructions
{ X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, 0 },
{ X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, 0 },
@@ -943,6 +952,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
{ X86::CMPSDrr, X86::CMPSDrm, 0 },
{ X86::CMPSSrr, X86::CMPSSrm, 0 },
+ { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
+ { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
{ X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
{ X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
{ X86::DIVSDrr, X86::DIVSDrm, 0 },
@@ -1201,6 +1212,25 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 },
{ X86::MMX_PXORirr, X86::MMX_PXORirm, 0 },
+ // 3DNow! version of foldable instructions
+ { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
+ { X86::PFACCrr, X86::PFACCrm, 0 },
+ { X86::PFADDrr, X86::PFADDrm, 0 },
+ { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
+ { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
+ { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
+ { X86::PFMAXrr, X86::PFMAXrm, 0 },
+ { X86::PFMINrr, X86::PFMINrm, 0 },
+ { X86::PFMULrr, X86::PFMULrm, 0 },
+ { X86::PFNACCrr, X86::PFNACCrm, 0 },
+ { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
+ { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
+ { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
+ { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
+ { X86::PFSUBrr, X86::PFSUBrm, 0 },
+ { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
+ { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
+
// AVX 128-bit versions of foldable instructions
{ X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
{ X86::Int_VCVTSD2SSrr, X86::Int_VCVTSD2SSrm, 0 },
@@ -5969,6 +5999,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
{ X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
{ X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVLPSmr, X86::MOVLPDmr, X86::MOVPQI2QImr },
{ X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
{ X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
{ X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
@@ -5984,6 +6015,7 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
{ X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
{ X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ // TODO: Add the AVX versions of MOVLPSmr
{ X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
{ X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
{ X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ccdbf0e..65b155c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -643,9 +643,6 @@ let Predicates = [UseAVX] in {
// Represent the same patterns above but in the form they appear for
// 256-bit types
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
(v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
@@ -653,9 +650,6 @@ let Predicates = [UseAVX] in {
(v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
}
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_xmm)>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -793,7 +787,7 @@ let Predicates = [UseSSE2] in {
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>;
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
- // is during lowering, where it's not possible to recognize the fold cause
+ // is during lowering, where it's not possible to recognize the fold because
// it has two uses through a bitcast. One use disappears at isel time and the
// fold opportunity reappears.
def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
@@ -3678,13 +3672,30 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
PS, Requires<[HasSSE2]>;
} // SchedRW = [WriteStore]
+let Predicates = [HasAVX2, NoVLX] in {
+ def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+}
+
let Predicates = [HasAVX, NoVLX] in {
def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
- (VMOVNTPSmr addr:$dst, VR128:$src)>;
+ (VMOVNTDQmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVNTDQmr addr:$dst, VR128:$src)>;
}
def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
- (MOVNTPSmr addr:$dst, VR128:$src)>;
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
+def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst),
+ (MOVNTDQmr addr:$dst, VR128:$src)>;
} // AddedComplexity
@@ -4890,7 +4901,8 @@ let Predicates = [UseAVX] in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIrr GR32:$src)>;
- // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
+ // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
+ // These instructions also write zeros in the high part of a 256-bit register.
let AddedComplexity = 20 in {
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
(VMOVDI2PDIrm addr:$src)>;
@@ -4898,6 +4910,9 @@ let Predicates = [UseAVX] in {
(VMOVDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
}
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -5016,6 +5031,9 @@ let Predicates = [UseAVX], AddedComplexity = 20 in {
(VMOVZQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
(VMOVZQI2PQIrm addr:$src)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
}
let Predicates = [UseSSE2], AddedComplexity = 20 in {
@@ -7150,6 +7168,10 @@ let Predicates = [HasAVX2] in {
}
// Patterns
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
let Predicates = [UseAVX] in {
let AddedComplexity = 15 in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
@@ -7166,8 +7188,10 @@ let Predicates = [UseAVX] in {
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
(VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>;
- def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
- (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
+
+ // Move low f64 and clear high bits.
+ def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+ (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
}
def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
@@ -7181,14 +7205,19 @@ let Predicates = [UseAVX] in {
(v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
sub_xmm)>;
- // Move low f64 and clear high bits.
- def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
- (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
-
+ // These will incur an FP/int domain crossing penalty, but it may be the only
+ // way without AVX2. Do not add any complexity because we may be able to match
+ // more optimal patterns defined earlier in this file.
+ def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+ (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>;
def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
(VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>;
}
+// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or
+// on targets where they have equal performance. These were changed to use
+// blends because blends have better throughput on SandyBridge and Haswell, but
+// movs[s/d] are 1-2 byte shorter instructions.
let Predicates = [UseSSE41] in {
// With SSE41 we can use blends for these patterns.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
@@ -8341,7 +8370,7 @@ let Predicates = [HasAVX2] in {
def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
(VBROADCASTSDYrr VR128:$src)>;
- // Provide aliases for broadcast from the same regitser class that
+ // Provide aliases for broadcast from the same register class that
// automatically does the extract.
def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
(VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 42256b2..28a3b7b 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -334,6 +334,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMUL_RND),
X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
X86ISD::FMUL_RND),
+ X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pand_q_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_128, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_256, CMP_MASK, X86ISD::PCMPEQM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpeq_b_512, CMP_MASK, X86ISD::PCMPEQM, 0),
@@ -358,6 +362,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256, CMP_MASK, X86ISD::PCMPGTM, 0),
X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512, CMP_MASK, X86ISD::PCMPGTM, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PMULUDQ, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_d_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_q, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
@@ -376,6 +386,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psrli_q, VSHIFT_MASK, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_d, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrlv_q, INTR_TYPE_2OP_MASK, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_sd, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::RNDSCALE, 0),
X86_INTRINSIC_DATA(avx512_mask_rndscale_ss, INTR_TYPE_SCALAR_MASK_RM,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index ca8fc9c..4bfc7f9 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -193,7 +193,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
DAG.getConstant(Offset, AddrVT)),
Src,
DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+ Align, isVolatile, false,
+ DstPtrInfo.getWithOffset(Offset));
}
// TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
@@ -282,7 +283,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
DAG.getNode(ISD::ADD, dl, SrcVT, Src,
DAG.getConstant(Offset, SrcVT)),
DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, AlwaysInline,
+ Align, isVolatile, AlwaysInline, false,
DstPtrInfo.getWithOffset(Offset),
SrcPtrInfo.getWithOffset(Offset)));
}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 215fe89..36b3b02 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -30,7 +30,7 @@ void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
}
void XCoreInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
+ StringRef Annot, const MCSubtargetInfo &STI) {
printInstruction(MI, O);
printAnnotation(O, Annot);
}
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 78521fd..6fd2dec 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -32,7 +32,8 @@ public:
static const char *getRegisterName(unsigned RegNo);
void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index d0a09b2..4a790c8 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -81,12 +81,11 @@ static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
return X;
}
-static MCInstPrinter *createXCoreMCInstPrinter(const Target &T,
+static MCInstPrinter *createXCoreMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
+ const MCRegisterInfo &MRI) {
return new XCoreInstPrinter(MAI, MII, MRI);
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 6e8a95a..c4e3bb8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -308,7 +308,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
Constant *GA = ConstantExpr::getBitCast(const_cast<GlobalValue*>(GV), Ty);
Ty = Type::getInt32Ty(*DAG.getContext());
Constant *Idx = ConstantInt::get(Ty, Offset);
- Constant *GAI = ConstantExpr::getGetElementPtr(GA, Idx);
+ Constant *GAI = ConstantExpr::getGetElementPtr(
+ Type::getInt8Ty(*DAG.getContext()), GA, Idx);
SDValue CP = DAG.getConstantPool(GAI, MVT::i32);
return DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), CP,
MachinePointerInfo(), false, false, false, 0);
@@ -1422,7 +1423,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
InVals.push_back(FIN);
MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
DAG.getConstant(Size, MVT::i32),
- Align, false, false,
+ Align, false, false, false,
MachinePointerInfo(),
MachinePointerInfo()));
} else {
@@ -1833,10 +1834,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
LD->getAlignment() == Alignment &&
!LD->isVolatile() && !LD->isIndexed() &&
Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+ bool isTail = isInTailCallPosition(DAG, ST, Chain);
return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
LD->getBasePtr(),
DAG.getConstant(StoreBits/8, MVT::i32),
- Alignment, false, ST->getPointerInfo(),
+ Alignment, false, isTail, ST->getPointerInfo(),
LD->getPointerInfo());
}
}
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index b4c6a50..9fb63e9 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -82,8 +82,9 @@ createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
case Instruction::GetElementPtr: {
SmallVector<Value *,4> CEOpVec(CE->op_begin(), CE->op_end());
ArrayRef<Value *> CEOps(CEOpVec);
- return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(CEOps[0],
- CEOps.slice(1)));
+ return dyn_cast<Instruction>(Builder.CreateInBoundsGEP(
+ cast<GEPOperator>(CE)->getSourceElementType(), CEOps[0],
+ CEOps.slice(1)));
}
case Instruction::Add:
case Instruction::Sub:
@@ -212,7 +213,8 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
SmallVector<Value *, 2> Indices;
Indices.push_back(Constant::getNullValue(Type::getInt64Ty(Ctx)));
Indices.push_back(ThreadID);
- Value *Addr = Builder.CreateInBoundsGEP(NewGV, Indices);
+ Value *Addr =
+ Builder.CreateInBoundsGEP(NewGV->getValueType(), NewGV, Indices);
U->replaceUsesOfWith(GV, Addr);
}
diff --git a/lib/Target/XCore/XCoreTargetStreamer.h b/lib/Target/XCore/XCoreTargetStreamer.h
index 48bf0fa..3563dbc 100644
--- a/lib/Target/XCore/XCoreTargetStreamer.h
+++ b/lib/Target/XCore/XCoreTargetStreamer.h
@@ -16,7 +16,7 @@ namespace llvm {
class XCoreTargetStreamer : public MCTargetStreamer {
public:
XCoreTargetStreamer(MCStreamer &S);
- virtual ~XCoreTargetStreamer();
+ ~XCoreTargetStreamer() override;
virtual void emitCCTopData(StringRef Name) = 0;
virtual void emitCCTopFunction(StringRef Name) = 0;
virtual void emitCCBottomData(StringRef Name) = 0;